xref: /openbmc/linux/net/ipv4/devinet.c (revision 31b90347)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67 
68 #include "fib_lookup.h"
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 	},
79 };
80 
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82 	.data = {
83 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90 	},
91 };
92 
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95 
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97 	[IFA_LOCAL]     	= { .type = NLA_U32 },
98 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
99 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
100 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
102 };
103 
104 #define IN4_ADDR_HSIZE_SHIFT	8
105 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
106 
107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
108 static DEFINE_SPINLOCK(inet_addr_hash_lock);
109 
110 static u32 inet_addr_hash(struct net *net, __be32 addr)
111 {
112 	u32 val = (__force u32) addr ^ net_hash_mix(net);
113 
114 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116 
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
120 
121 	spin_lock(&inet_addr_hash_lock);
122 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123 	spin_unlock(&inet_addr_hash_lock);
124 }
125 
126 static void inet_hash_remove(struct in_ifaddr *ifa)
127 {
128 	spin_lock(&inet_addr_hash_lock);
129 	hlist_del_init_rcu(&ifa->hash);
130 	spin_unlock(&inet_addr_hash_lock);
131 }
132 
133 /**
134  * __ip_dev_find - find the first device with a given source address.
135  * @net: the net namespace
136  * @addr: the source address
137  * @devref: if true, take a reference on the found device
138  *
139  * If a caller uses devref=false, it should be protected by RCU, or RTNL
140  */
141 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
142 {
143 	u32 hash = inet_addr_hash(net, addr);
144 	struct net_device *result = NULL;
145 	struct in_ifaddr *ifa;
146 
147 	rcu_read_lock();
148 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
149 		if (ifa->ifa_local == addr) {
150 			struct net_device *dev = ifa->ifa_dev->dev;
151 
152 			if (!net_eq(dev_net(dev), net))
153 				continue;
154 			result = dev;
155 			break;
156 		}
157 	}
158 	if (!result) {
159 		struct flowi4 fl4 = { .daddr = addr };
160 		struct fib_result res = { 0 };
161 		struct fib_table *local;
162 
163 		/* Fallback to FIB local table so that communication
164 		 * over loopback subnets work.
165 		 */
166 		local = fib_get_table(net, RT_TABLE_LOCAL);
167 		if (local &&
168 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
169 		    res.type == RTN_LOCAL)
170 			result = FIB_RES_DEV(res);
171 	}
172 	if (result && devref)
173 		dev_hold(result);
174 	rcu_read_unlock();
175 	return result;
176 }
177 EXPORT_SYMBOL(__ip_dev_find);
178 
179 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
180 
181 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
182 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
183 			 int destroy);
184 #ifdef CONFIG_SYSCTL
185 static void devinet_sysctl_register(struct in_device *idev);
186 static void devinet_sysctl_unregister(struct in_device *idev);
187 #else
188 static void devinet_sysctl_register(struct in_device *idev)
189 {
190 }
191 static void devinet_sysctl_unregister(struct in_device *idev)
192 {
193 }
194 #endif
195 
196 /* Locks all the inet devices. */
197 
198 static struct in_ifaddr *inet_alloc_ifa(void)
199 {
200 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
201 }
202 
203 static void inet_rcu_free_ifa(struct rcu_head *head)
204 {
205 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
206 	if (ifa->ifa_dev)
207 		in_dev_put(ifa->ifa_dev);
208 	kfree(ifa);
209 }
210 
211 static void inet_free_ifa(struct in_ifaddr *ifa)
212 {
213 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
214 }
215 
216 void in_dev_finish_destroy(struct in_device *idev)
217 {
218 	struct net_device *dev = idev->dev;
219 
220 	WARN_ON(idev->ifa_list);
221 	WARN_ON(idev->mc_list);
222 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
223 #ifdef NET_REFCNT_DEBUG
224 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
225 #endif
226 	dev_put(dev);
227 	if (!idev->dead)
228 		pr_err("Freeing alive in_device %p\n", idev);
229 	else
230 		kfree(idev);
231 }
232 EXPORT_SYMBOL(in_dev_finish_destroy);
233 
234 static struct in_device *inetdev_init(struct net_device *dev)
235 {
236 	struct in_device *in_dev;
237 
238 	ASSERT_RTNL();
239 
240 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241 	if (!in_dev)
242 		goto out;
243 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244 			sizeof(in_dev->cnf));
245 	in_dev->cnf.sysctl = NULL;
246 	in_dev->dev = dev;
247 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248 	if (!in_dev->arp_parms)
249 		goto out_kfree;
250 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251 		dev_disable_lro(dev);
252 	/* Reference in_dev->dev */
253 	dev_hold(dev);
254 	/* Account for reference dev->ip_ptr (below) */
255 	in_dev_hold(in_dev);
256 
257 	devinet_sysctl_register(in_dev);
258 	ip_mc_init_dev(in_dev);
259 	if (dev->flags & IFF_UP)
260 		ip_mc_up(in_dev);
261 
262 	/* we can receive as soon as ip_ptr is set -- do this last */
263 	rcu_assign_pointer(dev->ip_ptr, in_dev);
264 out:
265 	return in_dev;
266 out_kfree:
267 	kfree(in_dev);
268 	in_dev = NULL;
269 	goto out;
270 }
271 
272 static void in_dev_rcu_put(struct rcu_head *head)
273 {
274 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
275 	in_dev_put(idev);
276 }
277 
278 static void inetdev_destroy(struct in_device *in_dev)
279 {
280 	struct in_ifaddr *ifa;
281 	struct net_device *dev;
282 
283 	ASSERT_RTNL();
284 
285 	dev = in_dev->dev;
286 
287 	in_dev->dead = 1;
288 
289 	ip_mc_destroy_dev(in_dev);
290 
291 	while ((ifa = in_dev->ifa_list) != NULL) {
292 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
293 		inet_free_ifa(ifa);
294 	}
295 
296 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
297 
298 	devinet_sysctl_unregister(in_dev);
299 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
300 	arp_ifdown(dev);
301 
302 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
303 }
304 
305 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
306 {
307 	rcu_read_lock();
308 	for_primary_ifa(in_dev) {
309 		if (inet_ifa_match(a, ifa)) {
310 			if (!b || inet_ifa_match(b, ifa)) {
311 				rcu_read_unlock();
312 				return 1;
313 			}
314 		}
315 	} endfor_ifa(in_dev);
316 	rcu_read_unlock();
317 	return 0;
318 }
319 
320 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
321 			 int destroy, struct nlmsghdr *nlh, u32 portid)
322 {
323 	struct in_ifaddr *promote = NULL;
324 	struct in_ifaddr *ifa, *ifa1 = *ifap;
325 	struct in_ifaddr *last_prim = in_dev->ifa_list;
326 	struct in_ifaddr *prev_prom = NULL;
327 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
328 
329 	ASSERT_RTNL();
330 
331 	/* 1. Deleting primary ifaddr forces deletion all secondaries
332 	 * unless alias promotion is set
333 	 **/
334 
335 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
336 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
337 
338 		while ((ifa = *ifap1) != NULL) {
339 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
340 			    ifa1->ifa_scope <= ifa->ifa_scope)
341 				last_prim = ifa;
342 
343 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
344 			    ifa1->ifa_mask != ifa->ifa_mask ||
345 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
346 				ifap1 = &ifa->ifa_next;
347 				prev_prom = ifa;
348 				continue;
349 			}
350 
351 			if (!do_promote) {
352 				inet_hash_remove(ifa);
353 				*ifap1 = ifa->ifa_next;
354 
355 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
356 				blocking_notifier_call_chain(&inetaddr_chain,
357 						NETDEV_DOWN, ifa);
358 				inet_free_ifa(ifa);
359 			} else {
360 				promote = ifa;
361 				break;
362 			}
363 		}
364 	}
365 
366 	/* On promotion all secondaries from subnet are changing
367 	 * the primary IP, we must remove all their routes silently
368 	 * and later to add them back with new prefsrc. Do this
369 	 * while all addresses are on the device list.
370 	 */
371 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
372 		if (ifa1->ifa_mask == ifa->ifa_mask &&
373 		    inet_ifa_match(ifa1->ifa_address, ifa))
374 			fib_del_ifaddr(ifa, ifa1);
375 	}
376 
377 	/* 2. Unlink it */
378 
379 	*ifap = ifa1->ifa_next;
380 	inet_hash_remove(ifa1);
381 
382 	/* 3. Announce address deletion */
383 
384 	/* Send message first, then call notifier.
385 	   At first sight, FIB update triggered by notifier
386 	   will refer to already deleted ifaddr, that could confuse
387 	   netlink listeners. It is not true: look, gated sees
388 	   that route deleted and if it still thinks that ifaddr
389 	   is valid, it will try to restore deleted routes... Grr.
390 	   So that, this order is correct.
391 	 */
392 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
393 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
394 
395 	if (promote) {
396 		struct in_ifaddr *next_sec = promote->ifa_next;
397 
398 		if (prev_prom) {
399 			prev_prom->ifa_next = promote->ifa_next;
400 			promote->ifa_next = last_prim->ifa_next;
401 			last_prim->ifa_next = promote;
402 		}
403 
404 		promote->ifa_flags &= ~IFA_F_SECONDARY;
405 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
406 		blocking_notifier_call_chain(&inetaddr_chain,
407 				NETDEV_UP, promote);
408 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
409 			if (ifa1->ifa_mask != ifa->ifa_mask ||
410 			    !inet_ifa_match(ifa1->ifa_address, ifa))
411 					continue;
412 			fib_add_ifaddr(ifa);
413 		}
414 
415 	}
416 	if (destroy)
417 		inet_free_ifa(ifa1);
418 }
419 
420 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
421 			 int destroy)
422 {
423 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
424 }
425 
426 static void check_lifetime(struct work_struct *work);
427 
428 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
429 
430 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
431 			     u32 portid)
432 {
433 	struct in_device *in_dev = ifa->ifa_dev;
434 	struct in_ifaddr *ifa1, **ifap, **last_primary;
435 
436 	ASSERT_RTNL();
437 
438 	if (!ifa->ifa_local) {
439 		inet_free_ifa(ifa);
440 		return 0;
441 	}
442 
443 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
444 	last_primary = &in_dev->ifa_list;
445 
446 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
447 	     ifap = &ifa1->ifa_next) {
448 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
449 		    ifa->ifa_scope <= ifa1->ifa_scope)
450 			last_primary = &ifa1->ifa_next;
451 		if (ifa1->ifa_mask == ifa->ifa_mask &&
452 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
453 			if (ifa1->ifa_local == ifa->ifa_local) {
454 				inet_free_ifa(ifa);
455 				return -EEXIST;
456 			}
457 			if (ifa1->ifa_scope != ifa->ifa_scope) {
458 				inet_free_ifa(ifa);
459 				return -EINVAL;
460 			}
461 			ifa->ifa_flags |= IFA_F_SECONDARY;
462 		}
463 	}
464 
465 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
466 		net_srandom(ifa->ifa_local);
467 		ifap = last_primary;
468 	}
469 
470 	ifa->ifa_next = *ifap;
471 	*ifap = ifa;
472 
473 	inet_hash_insert(dev_net(in_dev->dev), ifa);
474 
475 	cancel_delayed_work(&check_lifetime_work);
476 	schedule_delayed_work(&check_lifetime_work, 0);
477 
478 	/* Send message first, then call notifier.
479 	   Notifier will trigger FIB update, so that
480 	   listeners of netlink will know about new ifaddr */
481 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
482 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
483 
484 	return 0;
485 }
486 
487 static int inet_insert_ifa(struct in_ifaddr *ifa)
488 {
489 	return __inet_insert_ifa(ifa, NULL, 0);
490 }
491 
492 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
493 {
494 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
495 
496 	ASSERT_RTNL();
497 
498 	if (!in_dev) {
499 		inet_free_ifa(ifa);
500 		return -ENOBUFS;
501 	}
502 	ipv4_devconf_setall(in_dev);
503 	if (ifa->ifa_dev != in_dev) {
504 		WARN_ON(ifa->ifa_dev);
505 		in_dev_hold(in_dev);
506 		ifa->ifa_dev = in_dev;
507 	}
508 	if (ipv4_is_loopback(ifa->ifa_local))
509 		ifa->ifa_scope = RT_SCOPE_HOST;
510 	return inet_insert_ifa(ifa);
511 }
512 
513 /* Caller must hold RCU or RTNL :
514  * We dont take a reference on found in_device
515  */
516 struct in_device *inetdev_by_index(struct net *net, int ifindex)
517 {
518 	struct net_device *dev;
519 	struct in_device *in_dev = NULL;
520 
521 	rcu_read_lock();
522 	dev = dev_get_by_index_rcu(net, ifindex);
523 	if (dev)
524 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
525 	rcu_read_unlock();
526 	return in_dev;
527 }
528 EXPORT_SYMBOL(inetdev_by_index);
529 
530 /* Called only from RTNL semaphored context. No locks. */
531 
532 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
533 				    __be32 mask)
534 {
535 	ASSERT_RTNL();
536 
537 	for_primary_ifa(in_dev) {
538 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
539 			return ifa;
540 	} endfor_ifa(in_dev);
541 	return NULL;
542 }
543 
544 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
545 {
546 	struct net *net = sock_net(skb->sk);
547 	struct nlattr *tb[IFA_MAX+1];
548 	struct in_device *in_dev;
549 	struct ifaddrmsg *ifm;
550 	struct in_ifaddr *ifa, **ifap;
551 	int err = -EINVAL;
552 
553 	ASSERT_RTNL();
554 
555 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
556 	if (err < 0)
557 		goto errout;
558 
559 	ifm = nlmsg_data(nlh);
560 	in_dev = inetdev_by_index(net, ifm->ifa_index);
561 	if (in_dev == NULL) {
562 		err = -ENODEV;
563 		goto errout;
564 	}
565 
566 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
567 	     ifap = &ifa->ifa_next) {
568 		if (tb[IFA_LOCAL] &&
569 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
570 			continue;
571 
572 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
573 			continue;
574 
575 		if (tb[IFA_ADDRESS] &&
576 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
577 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
578 			continue;
579 
580 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
581 		return 0;
582 	}
583 
584 	err = -EADDRNOTAVAIL;
585 errout:
586 	return err;
587 }
588 
589 #define INFINITY_LIFE_TIME	0xFFFFFFFF
590 
591 static void check_lifetime(struct work_struct *work)
592 {
593 	unsigned long now, next, next_sec, next_sched;
594 	struct in_ifaddr *ifa;
595 	struct hlist_node *n;
596 	int i;
597 
598 	now = jiffies;
599 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
600 
601 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
602 		bool change_needed = false;
603 
604 		rcu_read_lock();
605 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
606 			unsigned long age;
607 
608 			if (ifa->ifa_flags & IFA_F_PERMANENT)
609 				continue;
610 
611 			/* We try to batch several events at once. */
612 			age = (now - ifa->ifa_tstamp +
613 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
614 
615 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
616 			    age >= ifa->ifa_valid_lft) {
617 				change_needed = true;
618 			} else if (ifa->ifa_preferred_lft ==
619 				   INFINITY_LIFE_TIME) {
620 				continue;
621 			} else if (age >= ifa->ifa_preferred_lft) {
622 				if (time_before(ifa->ifa_tstamp +
623 						ifa->ifa_valid_lft * HZ, next))
624 					next = ifa->ifa_tstamp +
625 					       ifa->ifa_valid_lft * HZ;
626 
627 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
628 					change_needed = true;
629 			} else if (time_before(ifa->ifa_tstamp +
630 					       ifa->ifa_preferred_lft * HZ,
631 					       next)) {
632 				next = ifa->ifa_tstamp +
633 				       ifa->ifa_preferred_lft * HZ;
634 			}
635 		}
636 		rcu_read_unlock();
637 		if (!change_needed)
638 			continue;
639 		rtnl_lock();
640 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
641 			unsigned long age;
642 
643 			if (ifa->ifa_flags & IFA_F_PERMANENT)
644 				continue;
645 
646 			/* We try to batch several events at once. */
647 			age = (now - ifa->ifa_tstamp +
648 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
649 
650 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
651 			    age >= ifa->ifa_valid_lft) {
652 				struct in_ifaddr **ifap;
653 
654 				for (ifap = &ifa->ifa_dev->ifa_list;
655 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
656 					if (*ifap == ifa) {
657 						inet_del_ifa(ifa->ifa_dev,
658 							     ifap, 1);
659 						break;
660 					}
661 				}
662 			} else if (ifa->ifa_preferred_lft !=
663 				   INFINITY_LIFE_TIME &&
664 				   age >= ifa->ifa_preferred_lft &&
665 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
666 				ifa->ifa_flags |= IFA_F_DEPRECATED;
667 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
668 			}
669 		}
670 		rtnl_unlock();
671 	}
672 
673 	next_sec = round_jiffies_up(next);
674 	next_sched = next;
675 
676 	/* If rounded timeout is accurate enough, accept it. */
677 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
678 		next_sched = next_sec;
679 
680 	now = jiffies;
681 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
682 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
683 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
684 
685 	schedule_delayed_work(&check_lifetime_work, next_sched - now);
686 }
687 
688 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
689 			     __u32 prefered_lft)
690 {
691 	unsigned long timeout;
692 
693 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
694 
695 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
696 	if (addrconf_finite_timeout(timeout))
697 		ifa->ifa_valid_lft = timeout;
698 	else
699 		ifa->ifa_flags |= IFA_F_PERMANENT;
700 
701 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
702 	if (addrconf_finite_timeout(timeout)) {
703 		if (timeout == 0)
704 			ifa->ifa_flags |= IFA_F_DEPRECATED;
705 		ifa->ifa_preferred_lft = timeout;
706 	}
707 	ifa->ifa_tstamp = jiffies;
708 	if (!ifa->ifa_cstamp)
709 		ifa->ifa_cstamp = ifa->ifa_tstamp;
710 }
711 
712 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
713 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
714 {
715 	struct nlattr *tb[IFA_MAX+1];
716 	struct in_ifaddr *ifa;
717 	struct ifaddrmsg *ifm;
718 	struct net_device *dev;
719 	struct in_device *in_dev;
720 	int err;
721 
722 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
723 	if (err < 0)
724 		goto errout;
725 
726 	ifm = nlmsg_data(nlh);
727 	err = -EINVAL;
728 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
729 		goto errout;
730 
731 	dev = __dev_get_by_index(net, ifm->ifa_index);
732 	err = -ENODEV;
733 	if (dev == NULL)
734 		goto errout;
735 
736 	in_dev = __in_dev_get_rtnl(dev);
737 	err = -ENOBUFS;
738 	if (in_dev == NULL)
739 		goto errout;
740 
741 	ifa = inet_alloc_ifa();
742 	if (ifa == NULL)
743 		/*
744 		 * A potential indev allocation can be left alive, it stays
745 		 * assigned to its device and is destroy with it.
746 		 */
747 		goto errout;
748 
749 	ipv4_devconf_setall(in_dev);
750 	in_dev_hold(in_dev);
751 
752 	if (tb[IFA_ADDRESS] == NULL)
753 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
754 
755 	INIT_HLIST_NODE(&ifa->hash);
756 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
757 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
758 	ifa->ifa_flags = ifm->ifa_flags;
759 	ifa->ifa_scope = ifm->ifa_scope;
760 	ifa->ifa_dev = in_dev;
761 
762 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
763 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
764 
765 	if (tb[IFA_BROADCAST])
766 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
767 
768 	if (tb[IFA_LABEL])
769 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
770 	else
771 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
772 
773 	if (tb[IFA_CACHEINFO]) {
774 		struct ifa_cacheinfo *ci;
775 
776 		ci = nla_data(tb[IFA_CACHEINFO]);
777 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
778 			err = -EINVAL;
779 			goto errout_free;
780 		}
781 		*pvalid_lft = ci->ifa_valid;
782 		*pprefered_lft = ci->ifa_prefered;
783 	}
784 
785 	return ifa;
786 
787 errout_free:
788 	inet_free_ifa(ifa);
789 errout:
790 	return ERR_PTR(err);
791 }
792 
793 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
794 {
795 	struct in_device *in_dev = ifa->ifa_dev;
796 	struct in_ifaddr *ifa1, **ifap;
797 
798 	if (!ifa->ifa_local)
799 		return NULL;
800 
801 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
802 	     ifap = &ifa1->ifa_next) {
803 		if (ifa1->ifa_mask == ifa->ifa_mask &&
804 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
805 		    ifa1->ifa_local == ifa->ifa_local)
806 			return ifa1;
807 	}
808 	return NULL;
809 }
810 
811 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
812 {
813 	struct net *net = sock_net(skb->sk);
814 	struct in_ifaddr *ifa;
815 	struct in_ifaddr *ifa_existing;
816 	__u32 valid_lft = INFINITY_LIFE_TIME;
817 	__u32 prefered_lft = INFINITY_LIFE_TIME;
818 
819 	ASSERT_RTNL();
820 
821 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
822 	if (IS_ERR(ifa))
823 		return PTR_ERR(ifa);
824 
825 	ifa_existing = find_matching_ifa(ifa);
826 	if (!ifa_existing) {
827 		/* It would be best to check for !NLM_F_CREATE here but
828 		 * userspace alreay relies on not having to provide this.
829 		 */
830 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
831 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
832 	} else {
833 		inet_free_ifa(ifa);
834 
835 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
836 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
837 			return -EEXIST;
838 		ifa = ifa_existing;
839 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
840 		cancel_delayed_work(&check_lifetime_work);
841 		schedule_delayed_work(&check_lifetime_work, 0);
842 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
843 		blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
844 	}
845 	return 0;
846 }
847 
848 /*
849  *	Determine a default network mask, based on the IP address.
850  */
851 
852 static int inet_abc_len(__be32 addr)
853 {
854 	int rc = -1;	/* Something else, probably a multicast. */
855 
856 	if (ipv4_is_zeronet(addr))
857 		rc = 0;
858 	else {
859 		__u32 haddr = ntohl(addr);
860 
861 		if (IN_CLASSA(haddr))
862 			rc = 8;
863 		else if (IN_CLASSB(haddr))
864 			rc = 16;
865 		else if (IN_CLASSC(haddr))
866 			rc = 24;
867 	}
868 
869 	return rc;
870 }
871 
872 
873 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
874 {
875 	struct ifreq ifr;
876 	struct sockaddr_in sin_orig;
877 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
878 	struct in_device *in_dev;
879 	struct in_ifaddr **ifap = NULL;
880 	struct in_ifaddr *ifa = NULL;
881 	struct net_device *dev;
882 	char *colon;
883 	int ret = -EFAULT;
884 	int tryaddrmatch = 0;
885 
886 	/*
887 	 *	Fetch the caller's info block into kernel space
888 	 */
889 
890 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
891 		goto out;
892 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
893 
894 	/* save original address for comparison */
895 	memcpy(&sin_orig, sin, sizeof(*sin));
896 
897 	colon = strchr(ifr.ifr_name, ':');
898 	if (colon)
899 		*colon = 0;
900 
901 	dev_load(net, ifr.ifr_name);
902 
903 	switch (cmd) {
904 	case SIOCGIFADDR:	/* Get interface address */
905 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
906 	case SIOCGIFDSTADDR:	/* Get the destination address */
907 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
908 		/* Note that these ioctls will not sleep,
909 		   so that we do not impose a lock.
910 		   One day we will be forced to put shlock here (I mean SMP)
911 		 */
912 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
913 		memset(sin, 0, sizeof(*sin));
914 		sin->sin_family = AF_INET;
915 		break;
916 
917 	case SIOCSIFFLAGS:
918 		ret = -EPERM;
919 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
920 			goto out;
921 		break;
922 	case SIOCSIFADDR:	/* Set interface address (and family) */
923 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
924 	case SIOCSIFDSTADDR:	/* Set the destination address */
925 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
926 		ret = -EPERM;
927 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
928 			goto out;
929 		ret = -EINVAL;
930 		if (sin->sin_family != AF_INET)
931 			goto out;
932 		break;
933 	default:
934 		ret = -EINVAL;
935 		goto out;
936 	}
937 
938 	rtnl_lock();
939 
940 	ret = -ENODEV;
941 	dev = __dev_get_by_name(net, ifr.ifr_name);
942 	if (!dev)
943 		goto done;
944 
945 	if (colon)
946 		*colon = ':';
947 
948 	in_dev = __in_dev_get_rtnl(dev);
949 	if (in_dev) {
950 		if (tryaddrmatch) {
951 			/* Matthias Andree */
952 			/* compare label and address (4.4BSD style) */
953 			/* note: we only do this for a limited set of ioctls
954 			   and only if the original address family was AF_INET.
955 			   This is checked above. */
956 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
957 			     ifap = &ifa->ifa_next) {
958 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
959 				    sin_orig.sin_addr.s_addr ==
960 							ifa->ifa_local) {
961 					break; /* found */
962 				}
963 			}
964 		}
965 		/* we didn't get a match, maybe the application is
966 		   4.3BSD-style and passed in junk so we fall back to
967 		   comparing just the label */
968 		if (!ifa) {
969 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
970 			     ifap = &ifa->ifa_next)
971 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
972 					break;
973 		}
974 	}
975 
976 	ret = -EADDRNOTAVAIL;
977 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
978 		goto done;
979 
980 	switch (cmd) {
981 	case SIOCGIFADDR:	/* Get interface address */
982 		sin->sin_addr.s_addr = ifa->ifa_local;
983 		goto rarok;
984 
985 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
986 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
987 		goto rarok;
988 
989 	case SIOCGIFDSTADDR:	/* Get the destination address */
990 		sin->sin_addr.s_addr = ifa->ifa_address;
991 		goto rarok;
992 
993 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
994 		sin->sin_addr.s_addr = ifa->ifa_mask;
995 		goto rarok;
996 
997 	case SIOCSIFFLAGS:
998 		if (colon) {
999 			ret = -EADDRNOTAVAIL;
1000 			if (!ifa)
1001 				break;
1002 			ret = 0;
1003 			if (!(ifr.ifr_flags & IFF_UP))
1004 				inet_del_ifa(in_dev, ifap, 1);
1005 			break;
1006 		}
1007 		ret = dev_change_flags(dev, ifr.ifr_flags);
1008 		break;
1009 
1010 	case SIOCSIFADDR:	/* Set interface address (and family) */
1011 		ret = -EINVAL;
1012 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1013 			break;
1014 
1015 		if (!ifa) {
1016 			ret = -ENOBUFS;
1017 			ifa = inet_alloc_ifa();
1018 			if (!ifa)
1019 				break;
1020 			INIT_HLIST_NODE(&ifa->hash);
1021 			if (colon)
1022 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1023 			else
1024 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1025 		} else {
1026 			ret = 0;
1027 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1028 				break;
1029 			inet_del_ifa(in_dev, ifap, 0);
1030 			ifa->ifa_broadcast = 0;
1031 			ifa->ifa_scope = 0;
1032 		}
1033 
1034 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1035 
1036 		if (!(dev->flags & IFF_POINTOPOINT)) {
1037 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1038 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1039 			if ((dev->flags & IFF_BROADCAST) &&
1040 			    ifa->ifa_prefixlen < 31)
1041 				ifa->ifa_broadcast = ifa->ifa_address |
1042 						     ~ifa->ifa_mask;
1043 		} else {
1044 			ifa->ifa_prefixlen = 32;
1045 			ifa->ifa_mask = inet_make_mask(32);
1046 		}
1047 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1048 		ret = inet_set_ifa(dev, ifa);
1049 		break;
1050 
1051 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1052 		ret = 0;
1053 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1054 			inet_del_ifa(in_dev, ifap, 0);
1055 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1056 			inet_insert_ifa(ifa);
1057 		}
1058 		break;
1059 
1060 	case SIOCSIFDSTADDR:	/* Set the destination address */
1061 		ret = 0;
1062 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1063 			break;
1064 		ret = -EINVAL;
1065 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1066 			break;
1067 		ret = 0;
1068 		inet_del_ifa(in_dev, ifap, 0);
1069 		ifa->ifa_address = sin->sin_addr.s_addr;
1070 		inet_insert_ifa(ifa);
1071 		break;
1072 
1073 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1074 
1075 		/*
1076 		 *	The mask we set must be legal.
1077 		 */
1078 		ret = -EINVAL;
1079 		if (bad_mask(sin->sin_addr.s_addr, 0))
1080 			break;
1081 		ret = 0;
1082 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1083 			__be32 old_mask = ifa->ifa_mask;
1084 			inet_del_ifa(in_dev, ifap, 0);
1085 			ifa->ifa_mask = sin->sin_addr.s_addr;
1086 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1087 
1088 			/* See if current broadcast address matches
1089 			 * with current netmask, then recalculate
1090 			 * the broadcast address. Otherwise it's a
1091 			 * funny address, so don't touch it since
1092 			 * the user seems to know what (s)he's doing...
1093 			 */
1094 			if ((dev->flags & IFF_BROADCAST) &&
1095 			    (ifa->ifa_prefixlen < 31) &&
1096 			    (ifa->ifa_broadcast ==
1097 			     (ifa->ifa_local|~old_mask))) {
1098 				ifa->ifa_broadcast = (ifa->ifa_local |
1099 						      ~sin->sin_addr.s_addr);
1100 			}
1101 			inet_insert_ifa(ifa);
1102 		}
1103 		break;
1104 	}
1105 done:
1106 	rtnl_unlock();
1107 out:
1108 	return ret;
1109 rarok:
1110 	rtnl_unlock();
1111 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1112 	goto out;
1113 }
1114 
1115 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1116 {
1117 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1118 	struct in_ifaddr *ifa;
1119 	struct ifreq ifr;
1120 	int done = 0;
1121 
1122 	if (!in_dev)
1123 		goto out;
1124 
1125 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1126 		if (!buf) {
1127 			done += sizeof(ifr);
1128 			continue;
1129 		}
1130 		if (len < (int) sizeof(ifr))
1131 			break;
1132 		memset(&ifr, 0, sizeof(struct ifreq));
1133 		strcpy(ifr.ifr_name, ifa->ifa_label);
1134 
1135 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1136 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1137 								ifa->ifa_local;
1138 
1139 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1140 			done = -EFAULT;
1141 			break;
1142 		}
1143 		buf  += sizeof(struct ifreq);
1144 		len  -= sizeof(struct ifreq);
1145 		done += sizeof(struct ifreq);
1146 	}
1147 out:
1148 	return done;
1149 }
1150 
1151 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1152 {
1153 	__be32 addr = 0;
1154 	struct in_device *in_dev;
1155 	struct net *net = dev_net(dev);
1156 
1157 	rcu_read_lock();
1158 	in_dev = __in_dev_get_rcu(dev);
1159 	if (!in_dev)
1160 		goto no_in_dev;
1161 
1162 	for_primary_ifa(in_dev) {
1163 		if (ifa->ifa_scope > scope)
1164 			continue;
1165 		if (!dst || inet_ifa_match(dst, ifa)) {
1166 			addr = ifa->ifa_local;
1167 			break;
1168 		}
1169 		if (!addr)
1170 			addr = ifa->ifa_local;
1171 	} endfor_ifa(in_dev);
1172 
1173 	if (addr)
1174 		goto out_unlock;
1175 no_in_dev:
1176 
1177 	/* Not loopback addresses on loopback should be preferred
1178 	   in this case. It is importnat that lo is the first interface
1179 	   in dev_base list.
1180 	 */
1181 	for_each_netdev_rcu(net, dev) {
1182 		in_dev = __in_dev_get_rcu(dev);
1183 		if (!in_dev)
1184 			continue;
1185 
1186 		for_primary_ifa(in_dev) {
1187 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1188 			    ifa->ifa_scope <= scope) {
1189 				addr = ifa->ifa_local;
1190 				goto out_unlock;
1191 			}
1192 		} endfor_ifa(in_dev);
1193 	}
1194 out_unlock:
1195 	rcu_read_unlock();
1196 	return addr;
1197 }
1198 EXPORT_SYMBOL(inet_select_addr);
1199 
1200 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1201 			      __be32 local, int scope)
1202 {
1203 	int same = 0;
1204 	__be32 addr = 0;
1205 
1206 	for_ifa(in_dev) {
1207 		if (!addr &&
1208 		    (local == ifa->ifa_local || !local) &&
1209 		    ifa->ifa_scope <= scope) {
1210 			addr = ifa->ifa_local;
1211 			if (same)
1212 				break;
1213 		}
1214 		if (!same) {
1215 			same = (!local || inet_ifa_match(local, ifa)) &&
1216 				(!dst || inet_ifa_match(dst, ifa));
1217 			if (same && addr) {
1218 				if (local || !dst)
1219 					break;
1220 				/* Is the selected addr into dst subnet? */
1221 				if (inet_ifa_match(addr, ifa))
1222 					break;
1223 				/* No, then can we use new local src? */
1224 				if (ifa->ifa_scope <= scope) {
1225 					addr = ifa->ifa_local;
1226 					break;
1227 				}
1228 				/* search for large dst subnet for addr */
1229 				same = 0;
1230 			}
1231 		}
1232 	} endfor_ifa(in_dev);
1233 
1234 	return same ? addr : 0;
1235 }
1236 
1237 /*
1238  * Confirm that local IP address exists using wildcards:
1239  * - in_dev: only on this interface, 0=any interface
1240  * - dst: only in the same subnet as dst, 0=any dst
1241  * - local: address, 0=autoselect the local address
1242  * - scope: maximum allowed scope value for the local address
1243  */
1244 __be32 inet_confirm_addr(struct in_device *in_dev,
1245 			 __be32 dst, __be32 local, int scope)
1246 {
1247 	__be32 addr = 0;
1248 	struct net_device *dev;
1249 	struct net *net;
1250 
1251 	if (scope != RT_SCOPE_LINK)
1252 		return confirm_addr_indev(in_dev, dst, local, scope);
1253 
1254 	net = dev_net(in_dev->dev);
1255 	rcu_read_lock();
1256 	for_each_netdev_rcu(net, dev) {
1257 		in_dev = __in_dev_get_rcu(dev);
1258 		if (in_dev) {
1259 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1260 			if (addr)
1261 				break;
1262 		}
1263 	}
1264 	rcu_read_unlock();
1265 
1266 	return addr;
1267 }
1268 EXPORT_SYMBOL(inet_confirm_addr);
1269 
1270 /*
1271  *	Device notifier
1272  */
1273 
1274 int register_inetaddr_notifier(struct notifier_block *nb)
1275 {
1276 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1277 }
1278 EXPORT_SYMBOL(register_inetaddr_notifier);
1279 
1280 int unregister_inetaddr_notifier(struct notifier_block *nb)
1281 {
1282 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1283 }
1284 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1285 
1286 /* Rename ifa_labels for a device name change. Make some effort to preserve
1287  * existing alias numbering and to create unique labels if possible.
1288 */
1289 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1290 {
1291 	struct in_ifaddr *ifa;
1292 	int named = 0;
1293 
1294 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1295 		char old[IFNAMSIZ], *dot;
1296 
1297 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1298 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1299 		if (named++ == 0)
1300 			goto skip;
1301 		dot = strchr(old, ':');
1302 		if (dot == NULL) {
1303 			sprintf(old, ":%d", named);
1304 			dot = old;
1305 		}
1306 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1307 			strcat(ifa->ifa_label, dot);
1308 		else
1309 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1310 skip:
1311 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1312 	}
1313 }
1314 
1315 static bool inetdev_valid_mtu(unsigned int mtu)
1316 {
1317 	return mtu >= 68;
1318 }
1319 
1320 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1321 					struct in_device *in_dev)
1322 
1323 {
1324 	struct in_ifaddr *ifa;
1325 
1326 	for (ifa = in_dev->ifa_list; ifa;
1327 	     ifa = ifa->ifa_next) {
1328 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1329 			 ifa->ifa_local, dev,
1330 			 ifa->ifa_local, NULL,
1331 			 dev->dev_addr, NULL);
1332 	}
1333 }
1334 
1335 /* Called only under RTNL semaphore */
1336 
1337 static int inetdev_event(struct notifier_block *this, unsigned long event,
1338 			 void *ptr)
1339 {
1340 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1341 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1342 
1343 	ASSERT_RTNL();
1344 
1345 	if (!in_dev) {
1346 		if (event == NETDEV_REGISTER) {
1347 			in_dev = inetdev_init(dev);
1348 			if (!in_dev)
1349 				return notifier_from_errno(-ENOMEM);
1350 			if (dev->flags & IFF_LOOPBACK) {
1351 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1352 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1353 			}
1354 		} else if (event == NETDEV_CHANGEMTU) {
1355 			/* Re-enabling IP */
1356 			if (inetdev_valid_mtu(dev->mtu))
1357 				in_dev = inetdev_init(dev);
1358 		}
1359 		goto out;
1360 	}
1361 
1362 	switch (event) {
1363 	case NETDEV_REGISTER:
1364 		pr_debug("%s: bug\n", __func__);
1365 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1366 		break;
1367 	case NETDEV_UP:
1368 		if (!inetdev_valid_mtu(dev->mtu))
1369 			break;
1370 		if (dev->flags & IFF_LOOPBACK) {
1371 			struct in_ifaddr *ifa = inet_alloc_ifa();
1372 
1373 			if (ifa) {
1374 				INIT_HLIST_NODE(&ifa->hash);
1375 				ifa->ifa_local =
1376 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1377 				ifa->ifa_prefixlen = 8;
1378 				ifa->ifa_mask = inet_make_mask(8);
1379 				in_dev_hold(in_dev);
1380 				ifa->ifa_dev = in_dev;
1381 				ifa->ifa_scope = RT_SCOPE_HOST;
1382 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1383 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1384 						 INFINITY_LIFE_TIME);
1385 				inet_insert_ifa(ifa);
1386 			}
1387 		}
1388 		ip_mc_up(in_dev);
1389 		/* fall through */
1390 	case NETDEV_CHANGEADDR:
1391 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1392 			break;
1393 		/* fall through */
1394 	case NETDEV_NOTIFY_PEERS:
1395 		/* Send gratuitous ARP to notify of link change */
1396 		inetdev_send_gratuitous_arp(dev, in_dev);
1397 		break;
1398 	case NETDEV_DOWN:
1399 		ip_mc_down(in_dev);
1400 		break;
1401 	case NETDEV_PRE_TYPE_CHANGE:
1402 		ip_mc_unmap(in_dev);
1403 		break;
1404 	case NETDEV_POST_TYPE_CHANGE:
1405 		ip_mc_remap(in_dev);
1406 		break;
1407 	case NETDEV_CHANGEMTU:
1408 		if (inetdev_valid_mtu(dev->mtu))
1409 			break;
1410 		/* disable IP when MTU is not enough */
1411 	case NETDEV_UNREGISTER:
1412 		inetdev_destroy(in_dev);
1413 		break;
1414 	case NETDEV_CHANGENAME:
1415 		/* Do not notify about label change, this event is
1416 		 * not interesting to applications using netlink.
1417 		 */
1418 		inetdev_changename(dev, in_dev);
1419 
1420 		devinet_sysctl_unregister(in_dev);
1421 		devinet_sysctl_register(in_dev);
1422 		break;
1423 	}
1424 out:
1425 	return NOTIFY_DONE;
1426 }
1427 
1428 static struct notifier_block ip_netdev_notifier = {
1429 	.notifier_call = inetdev_event,
1430 };
1431 
1432 static size_t inet_nlmsg_size(void)
1433 {
1434 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1435 	       + nla_total_size(4) /* IFA_ADDRESS */
1436 	       + nla_total_size(4) /* IFA_LOCAL */
1437 	       + nla_total_size(4) /* IFA_BROADCAST */
1438 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1439 }
1440 
1441 static inline u32 cstamp_delta(unsigned long cstamp)
1442 {
1443 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1444 }
1445 
1446 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1447 			 unsigned long tstamp, u32 preferred, u32 valid)
1448 {
1449 	struct ifa_cacheinfo ci;
1450 
1451 	ci.cstamp = cstamp_delta(cstamp);
1452 	ci.tstamp = cstamp_delta(tstamp);
1453 	ci.ifa_prefered = preferred;
1454 	ci.ifa_valid = valid;
1455 
1456 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1457 }
1458 
1459 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1460 			    u32 portid, u32 seq, int event, unsigned int flags)
1461 {
1462 	struct ifaddrmsg *ifm;
1463 	struct nlmsghdr  *nlh;
1464 	u32 preferred, valid;
1465 
1466 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1467 	if (nlh == NULL)
1468 		return -EMSGSIZE;
1469 
1470 	ifm = nlmsg_data(nlh);
1471 	ifm->ifa_family = AF_INET;
1472 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1473 	ifm->ifa_flags = ifa->ifa_flags;
1474 	ifm->ifa_scope = ifa->ifa_scope;
1475 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1476 
1477 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1478 		preferred = ifa->ifa_preferred_lft;
1479 		valid = ifa->ifa_valid_lft;
1480 		if (preferred != INFINITY_LIFE_TIME) {
1481 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1482 
1483 			if (preferred > tval)
1484 				preferred -= tval;
1485 			else
1486 				preferred = 0;
1487 			if (valid != INFINITY_LIFE_TIME) {
1488 				if (valid > tval)
1489 					valid -= tval;
1490 				else
1491 					valid = 0;
1492 			}
1493 		}
1494 	} else {
1495 		preferred = INFINITY_LIFE_TIME;
1496 		valid = INFINITY_LIFE_TIME;
1497 	}
1498 	if ((ifa->ifa_address &&
1499 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1500 	    (ifa->ifa_local &&
1501 	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1502 	    (ifa->ifa_broadcast &&
1503 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1504 	    (ifa->ifa_label[0] &&
1505 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1506 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1507 			  preferred, valid))
1508 		goto nla_put_failure;
1509 
1510 	return nlmsg_end(skb, nlh);
1511 
1512 nla_put_failure:
1513 	nlmsg_cancel(skb, nlh);
1514 	return -EMSGSIZE;
1515 }
1516 
1517 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1518 {
1519 	struct net *net = sock_net(skb->sk);
1520 	int h, s_h;
1521 	int idx, s_idx;
1522 	int ip_idx, s_ip_idx;
1523 	struct net_device *dev;
1524 	struct in_device *in_dev;
1525 	struct in_ifaddr *ifa;
1526 	struct hlist_head *head;
1527 
1528 	s_h = cb->args[0];
1529 	s_idx = idx = cb->args[1];
1530 	s_ip_idx = ip_idx = cb->args[2];
1531 
1532 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1533 		idx = 0;
1534 		head = &net->dev_index_head[h];
1535 		rcu_read_lock();
1536 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1537 			  net->dev_base_seq;
1538 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1539 			if (idx < s_idx)
1540 				goto cont;
1541 			if (h > s_h || idx > s_idx)
1542 				s_ip_idx = 0;
1543 			in_dev = __in_dev_get_rcu(dev);
1544 			if (!in_dev)
1545 				goto cont;
1546 
1547 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1548 			     ifa = ifa->ifa_next, ip_idx++) {
1549 				if (ip_idx < s_ip_idx)
1550 					continue;
1551 				if (inet_fill_ifaddr(skb, ifa,
1552 					     NETLINK_CB(cb->skb).portid,
1553 					     cb->nlh->nlmsg_seq,
1554 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1555 					rcu_read_unlock();
1556 					goto done;
1557 				}
1558 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1559 			}
1560 cont:
1561 			idx++;
1562 		}
1563 		rcu_read_unlock();
1564 	}
1565 
1566 done:
1567 	cb->args[0] = h;
1568 	cb->args[1] = idx;
1569 	cb->args[2] = ip_idx;
1570 
1571 	return skb->len;
1572 }
1573 
1574 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1575 		      u32 portid)
1576 {
1577 	struct sk_buff *skb;
1578 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1579 	int err = -ENOBUFS;
1580 	struct net *net;
1581 
1582 	net = dev_net(ifa->ifa_dev->dev);
1583 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1584 	if (skb == NULL)
1585 		goto errout;
1586 
1587 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1588 	if (err < 0) {
1589 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1590 		WARN_ON(err == -EMSGSIZE);
1591 		kfree_skb(skb);
1592 		goto errout;
1593 	}
1594 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1595 	return;
1596 errout:
1597 	if (err < 0)
1598 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1599 }
1600 
1601 static size_t inet_get_link_af_size(const struct net_device *dev)
1602 {
1603 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1604 
1605 	if (!in_dev)
1606 		return 0;
1607 
1608 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1609 }
1610 
1611 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1612 {
1613 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1614 	struct nlattr *nla;
1615 	int i;
1616 
1617 	if (!in_dev)
1618 		return -ENODATA;
1619 
1620 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1621 	if (nla == NULL)
1622 		return -EMSGSIZE;
1623 
1624 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1625 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1626 
1627 	return 0;
1628 }
1629 
1630 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1631 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1632 };
1633 
1634 static int inet_validate_link_af(const struct net_device *dev,
1635 				 const struct nlattr *nla)
1636 {
1637 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1638 	int err, rem;
1639 
1640 	if (dev && !__in_dev_get_rtnl(dev))
1641 		return -EAFNOSUPPORT;
1642 
1643 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1644 	if (err < 0)
1645 		return err;
1646 
1647 	if (tb[IFLA_INET_CONF]) {
1648 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1649 			int cfgid = nla_type(a);
1650 
1651 			if (nla_len(a) < 4)
1652 				return -EINVAL;
1653 
1654 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1655 				return -EINVAL;
1656 		}
1657 	}
1658 
1659 	return 0;
1660 }
1661 
1662 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1663 {
1664 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1665 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1666 	int rem;
1667 
1668 	if (!in_dev)
1669 		return -EAFNOSUPPORT;
1670 
1671 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1672 		BUG();
1673 
1674 	if (tb[IFLA_INET_CONF]) {
1675 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1676 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1677 	}
1678 
1679 	return 0;
1680 }
1681 
1682 static int inet_netconf_msgsize_devconf(int type)
1683 {
1684 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1685 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1686 
1687 	/* type -1 is used for ALL */
1688 	if (type == -1 || type == NETCONFA_FORWARDING)
1689 		size += nla_total_size(4);
1690 	if (type == -1 || type == NETCONFA_RP_FILTER)
1691 		size += nla_total_size(4);
1692 	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1693 		size += nla_total_size(4);
1694 
1695 	return size;
1696 }
1697 
1698 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1699 				     struct ipv4_devconf *devconf, u32 portid,
1700 				     u32 seq, int event, unsigned int flags,
1701 				     int type)
1702 {
1703 	struct nlmsghdr  *nlh;
1704 	struct netconfmsg *ncm;
1705 
1706 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1707 			flags);
1708 	if (nlh == NULL)
1709 		return -EMSGSIZE;
1710 
1711 	ncm = nlmsg_data(nlh);
1712 	ncm->ncm_family = AF_INET;
1713 
1714 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1715 		goto nla_put_failure;
1716 
1717 	/* type -1 is used for ALL */
1718 	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1719 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1720 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1721 		goto nla_put_failure;
1722 	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1723 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1724 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1725 		goto nla_put_failure;
1726 	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1727 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1728 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1729 		goto nla_put_failure;
1730 
1731 	return nlmsg_end(skb, nlh);
1732 
1733 nla_put_failure:
1734 	nlmsg_cancel(skb, nlh);
1735 	return -EMSGSIZE;
1736 }
1737 
1738 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1739 				 struct ipv4_devconf *devconf)
1740 {
1741 	struct sk_buff *skb;
1742 	int err = -ENOBUFS;
1743 
1744 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1745 	if (skb == NULL)
1746 		goto errout;
1747 
1748 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1749 					RTM_NEWNETCONF, 0, type);
1750 	if (err < 0) {
1751 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1752 		WARN_ON(err == -EMSGSIZE);
1753 		kfree_skb(skb);
1754 		goto errout;
1755 	}
1756 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1757 	return;
1758 errout:
1759 	if (err < 0)
1760 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1761 }
1762 
1763 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1764 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1765 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1766 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1767 };
1768 
1769 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1770 				    struct nlmsghdr *nlh)
1771 {
1772 	struct net *net = sock_net(in_skb->sk);
1773 	struct nlattr *tb[NETCONFA_MAX+1];
1774 	struct netconfmsg *ncm;
1775 	struct sk_buff *skb;
1776 	struct ipv4_devconf *devconf;
1777 	struct in_device *in_dev;
1778 	struct net_device *dev;
1779 	int ifindex;
1780 	int err;
1781 
1782 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1783 			  devconf_ipv4_policy);
1784 	if (err < 0)
1785 		goto errout;
1786 
1787 	err = EINVAL;
1788 	if (!tb[NETCONFA_IFINDEX])
1789 		goto errout;
1790 
1791 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1792 	switch (ifindex) {
1793 	case NETCONFA_IFINDEX_ALL:
1794 		devconf = net->ipv4.devconf_all;
1795 		break;
1796 	case NETCONFA_IFINDEX_DEFAULT:
1797 		devconf = net->ipv4.devconf_dflt;
1798 		break;
1799 	default:
1800 		dev = __dev_get_by_index(net, ifindex);
1801 		if (dev == NULL)
1802 			goto errout;
1803 		in_dev = __in_dev_get_rtnl(dev);
1804 		if (in_dev == NULL)
1805 			goto errout;
1806 		devconf = &in_dev->cnf;
1807 		break;
1808 	}
1809 
1810 	err = -ENOBUFS;
1811 	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1812 	if (skb == NULL)
1813 		goto errout;
1814 
1815 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1816 					NETLINK_CB(in_skb).portid,
1817 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1818 					-1);
1819 	if (err < 0) {
1820 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1821 		WARN_ON(err == -EMSGSIZE);
1822 		kfree_skb(skb);
1823 		goto errout;
1824 	}
1825 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1826 errout:
1827 	return err;
1828 }
1829 
1830 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1831 				     struct netlink_callback *cb)
1832 {
1833 	struct net *net = sock_net(skb->sk);
1834 	int h, s_h;
1835 	int idx, s_idx;
1836 	struct net_device *dev;
1837 	struct in_device *in_dev;
1838 	struct hlist_head *head;
1839 
1840 	s_h = cb->args[0];
1841 	s_idx = idx = cb->args[1];
1842 
1843 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1844 		idx = 0;
1845 		head = &net->dev_index_head[h];
1846 		rcu_read_lock();
1847 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1848 			  net->dev_base_seq;
1849 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1850 			if (idx < s_idx)
1851 				goto cont;
1852 			in_dev = __in_dev_get_rcu(dev);
1853 			if (!in_dev)
1854 				goto cont;
1855 
1856 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1857 						      &in_dev->cnf,
1858 						      NETLINK_CB(cb->skb).portid,
1859 						      cb->nlh->nlmsg_seq,
1860 						      RTM_NEWNETCONF,
1861 						      NLM_F_MULTI,
1862 						      -1) <= 0) {
1863 				rcu_read_unlock();
1864 				goto done;
1865 			}
1866 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1867 cont:
1868 			idx++;
1869 		}
1870 		rcu_read_unlock();
1871 	}
1872 	if (h == NETDEV_HASHENTRIES) {
1873 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1874 					      net->ipv4.devconf_all,
1875 					      NETLINK_CB(cb->skb).portid,
1876 					      cb->nlh->nlmsg_seq,
1877 					      RTM_NEWNETCONF, NLM_F_MULTI,
1878 					      -1) <= 0)
1879 			goto done;
1880 		else
1881 			h++;
1882 	}
1883 	if (h == NETDEV_HASHENTRIES + 1) {
1884 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1885 					      net->ipv4.devconf_dflt,
1886 					      NETLINK_CB(cb->skb).portid,
1887 					      cb->nlh->nlmsg_seq,
1888 					      RTM_NEWNETCONF, NLM_F_MULTI,
1889 					      -1) <= 0)
1890 			goto done;
1891 		else
1892 			h++;
1893 	}
1894 done:
1895 	cb->args[0] = h;
1896 	cb->args[1] = idx;
1897 
1898 	return skb->len;
1899 }
1900 
1901 #ifdef CONFIG_SYSCTL
1902 
1903 static void devinet_copy_dflt_conf(struct net *net, int i)
1904 {
1905 	struct net_device *dev;
1906 
1907 	rcu_read_lock();
1908 	for_each_netdev_rcu(net, dev) {
1909 		struct in_device *in_dev;
1910 
1911 		in_dev = __in_dev_get_rcu(dev);
1912 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1913 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1914 	}
1915 	rcu_read_unlock();
1916 }
1917 
1918 /* called with RTNL locked */
1919 static void inet_forward_change(struct net *net)
1920 {
1921 	struct net_device *dev;
1922 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1923 
1924 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1925 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1926 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1927 				    NETCONFA_IFINDEX_ALL,
1928 				    net->ipv4.devconf_all);
1929 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1930 				    NETCONFA_IFINDEX_DEFAULT,
1931 				    net->ipv4.devconf_dflt);
1932 
1933 	for_each_netdev(net, dev) {
1934 		struct in_device *in_dev;
1935 		if (on)
1936 			dev_disable_lro(dev);
1937 		rcu_read_lock();
1938 		in_dev = __in_dev_get_rcu(dev);
1939 		if (in_dev) {
1940 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1941 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1942 						    dev->ifindex, &in_dev->cnf);
1943 		}
1944 		rcu_read_unlock();
1945 	}
1946 }
1947 
1948 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1949 			     void __user *buffer,
1950 			     size_t *lenp, loff_t *ppos)
1951 {
1952 	int old_value = *(int *)ctl->data;
1953 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1954 	int new_value = *(int *)ctl->data;
1955 
1956 	if (write) {
1957 		struct ipv4_devconf *cnf = ctl->extra1;
1958 		struct net *net = ctl->extra2;
1959 		int i = (int *)ctl->data - cnf->data;
1960 
1961 		set_bit(i, cnf->state);
1962 
1963 		if (cnf == net->ipv4.devconf_dflt)
1964 			devinet_copy_dflt_conf(net, i);
1965 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1966 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1967 			if ((new_value == 0) && (old_value != 0))
1968 				rt_cache_flush(net);
1969 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1970 		    new_value != old_value) {
1971 			int ifindex;
1972 
1973 			if (cnf == net->ipv4.devconf_dflt)
1974 				ifindex = NETCONFA_IFINDEX_DEFAULT;
1975 			else if (cnf == net->ipv4.devconf_all)
1976 				ifindex = NETCONFA_IFINDEX_ALL;
1977 			else {
1978 				struct in_device *idev =
1979 					container_of(cnf, struct in_device,
1980 						     cnf);
1981 				ifindex = idev->dev->ifindex;
1982 			}
1983 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1984 						    ifindex, cnf);
1985 		}
1986 	}
1987 
1988 	return ret;
1989 }
1990 
1991 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
1992 				  void __user *buffer,
1993 				  size_t *lenp, loff_t *ppos)
1994 {
1995 	int *valp = ctl->data;
1996 	int val = *valp;
1997 	loff_t pos = *ppos;
1998 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1999 
2000 	if (write && *valp != val) {
2001 		struct net *net = ctl->extra2;
2002 
2003 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2004 			if (!rtnl_trylock()) {
2005 				/* Restore the original values before restarting */
2006 				*valp = val;
2007 				*ppos = pos;
2008 				return restart_syscall();
2009 			}
2010 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2011 				inet_forward_change(net);
2012 			} else {
2013 				struct ipv4_devconf *cnf = ctl->extra1;
2014 				struct in_device *idev =
2015 					container_of(cnf, struct in_device, cnf);
2016 				if (*valp)
2017 					dev_disable_lro(idev->dev);
2018 				inet_netconf_notify_devconf(net,
2019 							    NETCONFA_FORWARDING,
2020 							    idev->dev->ifindex,
2021 							    cnf);
2022 			}
2023 			rtnl_unlock();
2024 			rt_cache_flush(net);
2025 		} else
2026 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2027 						    NETCONFA_IFINDEX_DEFAULT,
2028 						    net->ipv4.devconf_dflt);
2029 	}
2030 
2031 	return ret;
2032 }
2033 
2034 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2035 				void __user *buffer,
2036 				size_t *lenp, loff_t *ppos)
2037 {
2038 	int *valp = ctl->data;
2039 	int val = *valp;
2040 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2041 	struct net *net = ctl->extra2;
2042 
2043 	if (write && *valp != val)
2044 		rt_cache_flush(net);
2045 
2046 	return ret;
2047 }
2048 
2049 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2050 	{ \
2051 		.procname	= name, \
2052 		.data		= ipv4_devconf.data + \
2053 				  IPV4_DEVCONF_ ## attr - 1, \
2054 		.maxlen		= sizeof(int), \
2055 		.mode		= mval, \
2056 		.proc_handler	= proc, \
2057 		.extra1		= &ipv4_devconf, \
2058 	}
2059 
2060 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2061 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2062 
2063 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2064 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2065 
2066 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2067 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2068 
2069 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2070 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2071 
2072 static struct devinet_sysctl_table {
2073 	struct ctl_table_header *sysctl_header;
2074 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2075 } devinet_sysctl = {
2076 	.devinet_vars = {
2077 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2078 					     devinet_sysctl_forward),
2079 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2080 
2081 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2082 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2083 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2084 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2085 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2086 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2087 					"accept_source_route"),
2088 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2089 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2090 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2091 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2092 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2093 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2094 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2095 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2096 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2097 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2098 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2099 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2100 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2101 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2102 					"force_igmp_version"),
2103 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2104 					"igmpv2_unsolicited_report_interval"),
2105 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2106 					"igmpv3_unsolicited_report_interval"),
2107 
2108 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2109 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2110 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2111 					      "promote_secondaries"),
2112 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2113 					      "route_localnet"),
2114 	},
2115 };
2116 
2117 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2118 					struct ipv4_devconf *p)
2119 {
2120 	int i;
2121 	struct devinet_sysctl_table *t;
2122 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2123 
2124 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2125 	if (!t)
2126 		goto out;
2127 
2128 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2129 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2130 		t->devinet_vars[i].extra1 = p;
2131 		t->devinet_vars[i].extra2 = net;
2132 	}
2133 
2134 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2135 
2136 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2137 	if (!t->sysctl_header)
2138 		goto free;
2139 
2140 	p->sysctl = t;
2141 	return 0;
2142 
2143 free:
2144 	kfree(t);
2145 out:
2146 	return -ENOBUFS;
2147 }
2148 
2149 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2150 {
2151 	struct devinet_sysctl_table *t = cnf->sysctl;
2152 
2153 	if (t == NULL)
2154 		return;
2155 
2156 	cnf->sysctl = NULL;
2157 	unregister_net_sysctl_table(t->sysctl_header);
2158 	kfree(t);
2159 }
2160 
2161 static void devinet_sysctl_register(struct in_device *idev)
2162 {
2163 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2164 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2165 					&idev->cnf);
2166 }
2167 
2168 static void devinet_sysctl_unregister(struct in_device *idev)
2169 {
2170 	__devinet_sysctl_unregister(&idev->cnf);
2171 	neigh_sysctl_unregister(idev->arp_parms);
2172 }
2173 
2174 static struct ctl_table ctl_forward_entry[] = {
2175 	{
2176 		.procname	= "ip_forward",
2177 		.data		= &ipv4_devconf.data[
2178 					IPV4_DEVCONF_FORWARDING - 1],
2179 		.maxlen		= sizeof(int),
2180 		.mode		= 0644,
2181 		.proc_handler	= devinet_sysctl_forward,
2182 		.extra1		= &ipv4_devconf,
2183 		.extra2		= &init_net,
2184 	},
2185 	{ },
2186 };
2187 #endif
2188 
2189 static __net_init int devinet_init_net(struct net *net)
2190 {
2191 	int err;
2192 	struct ipv4_devconf *all, *dflt;
2193 #ifdef CONFIG_SYSCTL
2194 	struct ctl_table *tbl = ctl_forward_entry;
2195 	struct ctl_table_header *forw_hdr;
2196 #endif
2197 
2198 	err = -ENOMEM;
2199 	all = &ipv4_devconf;
2200 	dflt = &ipv4_devconf_dflt;
2201 
2202 	if (!net_eq(net, &init_net)) {
2203 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2204 		if (all == NULL)
2205 			goto err_alloc_all;
2206 
2207 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2208 		if (dflt == NULL)
2209 			goto err_alloc_dflt;
2210 
2211 #ifdef CONFIG_SYSCTL
2212 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2213 		if (tbl == NULL)
2214 			goto err_alloc_ctl;
2215 
2216 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2217 		tbl[0].extra1 = all;
2218 		tbl[0].extra2 = net;
2219 #endif
2220 	}
2221 
2222 #ifdef CONFIG_SYSCTL
2223 	err = __devinet_sysctl_register(net, "all", all);
2224 	if (err < 0)
2225 		goto err_reg_all;
2226 
2227 	err = __devinet_sysctl_register(net, "default", dflt);
2228 	if (err < 0)
2229 		goto err_reg_dflt;
2230 
2231 	err = -ENOMEM;
2232 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2233 	if (forw_hdr == NULL)
2234 		goto err_reg_ctl;
2235 	net->ipv4.forw_hdr = forw_hdr;
2236 #endif
2237 
2238 	net->ipv4.devconf_all = all;
2239 	net->ipv4.devconf_dflt = dflt;
2240 	return 0;
2241 
2242 #ifdef CONFIG_SYSCTL
2243 err_reg_ctl:
2244 	__devinet_sysctl_unregister(dflt);
2245 err_reg_dflt:
2246 	__devinet_sysctl_unregister(all);
2247 err_reg_all:
2248 	if (tbl != ctl_forward_entry)
2249 		kfree(tbl);
2250 err_alloc_ctl:
2251 #endif
2252 	if (dflt != &ipv4_devconf_dflt)
2253 		kfree(dflt);
2254 err_alloc_dflt:
2255 	if (all != &ipv4_devconf)
2256 		kfree(all);
2257 err_alloc_all:
2258 	return err;
2259 }
2260 
2261 static __net_exit void devinet_exit_net(struct net *net)
2262 {
2263 #ifdef CONFIG_SYSCTL
2264 	struct ctl_table *tbl;
2265 
2266 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2267 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2268 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2269 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2270 	kfree(tbl);
2271 #endif
2272 	kfree(net->ipv4.devconf_dflt);
2273 	kfree(net->ipv4.devconf_all);
2274 }
2275 
2276 static __net_initdata struct pernet_operations devinet_ops = {
2277 	.init = devinet_init_net,
2278 	.exit = devinet_exit_net,
2279 };
2280 
2281 static struct rtnl_af_ops inet_af_ops = {
2282 	.family		  = AF_INET,
2283 	.fill_link_af	  = inet_fill_link_af,
2284 	.get_link_af_size = inet_get_link_af_size,
2285 	.validate_link_af = inet_validate_link_af,
2286 	.set_link_af	  = inet_set_link_af,
2287 };
2288 
2289 void __init devinet_init(void)
2290 {
2291 	int i;
2292 
2293 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2294 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2295 
2296 	register_pernet_subsys(&devinet_ops);
2297 
2298 	register_gifconf(PF_INET, inet_gifconf);
2299 	register_netdevice_notifier(&ip_netdev_notifier);
2300 
2301 	schedule_delayed_work(&check_lifetime_work, 0);
2302 
2303 	rtnl_af_register(&inet_af_ops);
2304 
2305 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2306 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2307 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2308 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2309 		      inet_netconf_dump_devconf, NULL);
2310 }
2311 
2312