xref: /openbmc/linux/net/ipv4/devinet.c (revision 80ecbd24)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67 
68 #include "fib_lookup.h"
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 	},
77 };
78 
79 static struct ipv4_devconf ipv4_devconf_dflt = {
80 	.data = {
81 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86 	},
87 };
88 
89 #define IPV4_DEVCONF_DFLT(net, attr) \
90 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
91 
92 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93 	[IFA_LOCAL]     	= { .type = NLA_U32 },
94 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
95 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
96 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
98 };
99 
100 #define IN4_ADDR_HSIZE_SHIFT	8
101 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
102 
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105 
106 static u32 inet_addr_hash(struct net *net, __be32 addr)
107 {
108 	u32 val = (__force u32) addr ^ net_hash_mix(net);
109 
110 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
111 }
112 
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
116 
117 	spin_lock(&inet_addr_hash_lock);
118 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119 	spin_unlock(&inet_addr_hash_lock);
120 }
121 
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124 	spin_lock(&inet_addr_hash_lock);
125 	hlist_del_init_rcu(&ifa->hash);
126 	spin_unlock(&inet_addr_hash_lock);
127 }
128 
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139 	u32 hash = inet_addr_hash(net, addr);
140 	struct net_device *result = NULL;
141 	struct in_ifaddr *ifa;
142 
143 	rcu_read_lock();
144 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
145 		if (ifa->ifa_local == addr) {
146 			struct net_device *dev = ifa->ifa_dev->dev;
147 
148 			if (!net_eq(dev_net(dev), net))
149 				continue;
150 			result = dev;
151 			break;
152 		}
153 	}
154 	if (!result) {
155 		struct flowi4 fl4 = { .daddr = addr };
156 		struct fib_result res = { 0 };
157 		struct fib_table *local;
158 
159 		/* Fallback to FIB local table so that communication
160 		 * over loopback subnets work.
161 		 */
162 		local = fib_get_table(net, RT_TABLE_LOCAL);
163 		if (local &&
164 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
165 		    res.type == RTN_LOCAL)
166 			result = FIB_RES_DEV(res);
167 	}
168 	if (result && devref)
169 		dev_hold(result);
170 	rcu_read_unlock();
171 	return result;
172 }
173 EXPORT_SYMBOL(__ip_dev_find);
174 
175 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
176 
177 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
178 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
179 			 int destroy);
180 #ifdef CONFIG_SYSCTL
181 static void devinet_sysctl_register(struct in_device *idev);
182 static void devinet_sysctl_unregister(struct in_device *idev);
183 #else
184 static void devinet_sysctl_register(struct in_device *idev)
185 {
186 }
187 static void devinet_sysctl_unregister(struct in_device *idev)
188 {
189 }
190 #endif
191 
192 /* Locks all the inet devices. */
193 
194 static struct in_ifaddr *inet_alloc_ifa(void)
195 {
196 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
197 }
198 
199 static void inet_rcu_free_ifa(struct rcu_head *head)
200 {
201 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
202 	if (ifa->ifa_dev)
203 		in_dev_put(ifa->ifa_dev);
204 	kfree(ifa);
205 }
206 
207 static void inet_free_ifa(struct in_ifaddr *ifa)
208 {
209 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
210 }
211 
212 void in_dev_finish_destroy(struct in_device *idev)
213 {
214 	struct net_device *dev = idev->dev;
215 
216 	WARN_ON(idev->ifa_list);
217 	WARN_ON(idev->mc_list);
218 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
219 #ifdef NET_REFCNT_DEBUG
220 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
221 #endif
222 	dev_put(dev);
223 	if (!idev->dead)
224 		pr_err("Freeing alive in_device %p\n", idev);
225 	else
226 		kfree(idev);
227 }
228 EXPORT_SYMBOL(in_dev_finish_destroy);
229 
230 static struct in_device *inetdev_init(struct net_device *dev)
231 {
232 	struct in_device *in_dev;
233 
234 	ASSERT_RTNL();
235 
236 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
237 	if (!in_dev)
238 		goto out;
239 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
240 			sizeof(in_dev->cnf));
241 	in_dev->cnf.sysctl = NULL;
242 	in_dev->dev = dev;
243 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
244 	if (!in_dev->arp_parms)
245 		goto out_kfree;
246 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
247 		dev_disable_lro(dev);
248 	/* Reference in_dev->dev */
249 	dev_hold(dev);
250 	/* Account for reference dev->ip_ptr (below) */
251 	in_dev_hold(in_dev);
252 
253 	devinet_sysctl_register(in_dev);
254 	ip_mc_init_dev(in_dev);
255 	if (dev->flags & IFF_UP)
256 		ip_mc_up(in_dev);
257 
258 	/* we can receive as soon as ip_ptr is set -- do this last */
259 	rcu_assign_pointer(dev->ip_ptr, in_dev);
260 out:
261 	return in_dev;
262 out_kfree:
263 	kfree(in_dev);
264 	in_dev = NULL;
265 	goto out;
266 }
267 
268 static void in_dev_rcu_put(struct rcu_head *head)
269 {
270 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
271 	in_dev_put(idev);
272 }
273 
274 static void inetdev_destroy(struct in_device *in_dev)
275 {
276 	struct in_ifaddr *ifa;
277 	struct net_device *dev;
278 
279 	ASSERT_RTNL();
280 
281 	dev = in_dev->dev;
282 
283 	in_dev->dead = 1;
284 
285 	ip_mc_destroy_dev(in_dev);
286 
287 	while ((ifa = in_dev->ifa_list) != NULL) {
288 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
289 		inet_free_ifa(ifa);
290 	}
291 
292 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
293 
294 	devinet_sysctl_unregister(in_dev);
295 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
296 	arp_ifdown(dev);
297 
298 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
299 }
300 
301 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
302 {
303 	rcu_read_lock();
304 	for_primary_ifa(in_dev) {
305 		if (inet_ifa_match(a, ifa)) {
306 			if (!b || inet_ifa_match(b, ifa)) {
307 				rcu_read_unlock();
308 				return 1;
309 			}
310 		}
311 	} endfor_ifa(in_dev);
312 	rcu_read_unlock();
313 	return 0;
314 }
315 
316 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
317 			 int destroy, struct nlmsghdr *nlh, u32 portid)
318 {
319 	struct in_ifaddr *promote = NULL;
320 	struct in_ifaddr *ifa, *ifa1 = *ifap;
321 	struct in_ifaddr *last_prim = in_dev->ifa_list;
322 	struct in_ifaddr *prev_prom = NULL;
323 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
324 
325 	ASSERT_RTNL();
326 
327 	/* 1. Deleting primary ifaddr forces deletion all secondaries
328 	 * unless alias promotion is set
329 	 **/
330 
331 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
332 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
333 
334 		while ((ifa = *ifap1) != NULL) {
335 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
336 			    ifa1->ifa_scope <= ifa->ifa_scope)
337 				last_prim = ifa;
338 
339 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
340 			    ifa1->ifa_mask != ifa->ifa_mask ||
341 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
342 				ifap1 = &ifa->ifa_next;
343 				prev_prom = ifa;
344 				continue;
345 			}
346 
347 			if (!do_promote) {
348 				inet_hash_remove(ifa);
349 				*ifap1 = ifa->ifa_next;
350 
351 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
352 				blocking_notifier_call_chain(&inetaddr_chain,
353 						NETDEV_DOWN, ifa);
354 				inet_free_ifa(ifa);
355 			} else {
356 				promote = ifa;
357 				break;
358 			}
359 		}
360 	}
361 
362 	/* On promotion all secondaries from subnet are changing
363 	 * the primary IP, we must remove all their routes silently
364 	 * and later to add them back with new prefsrc. Do this
365 	 * while all addresses are on the device list.
366 	 */
367 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
368 		if (ifa1->ifa_mask == ifa->ifa_mask &&
369 		    inet_ifa_match(ifa1->ifa_address, ifa))
370 			fib_del_ifaddr(ifa, ifa1);
371 	}
372 
373 	/* 2. Unlink it */
374 
375 	*ifap = ifa1->ifa_next;
376 	inet_hash_remove(ifa1);
377 
378 	/* 3. Announce address deletion */
379 
380 	/* Send message first, then call notifier.
381 	   At first sight, FIB update triggered by notifier
382 	   will refer to already deleted ifaddr, that could confuse
383 	   netlink listeners. It is not true: look, gated sees
384 	   that route deleted and if it still thinks that ifaddr
385 	   is valid, it will try to restore deleted routes... Grr.
386 	   So that, this order is correct.
387 	 */
388 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
389 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
390 
391 	if (promote) {
392 		struct in_ifaddr *next_sec = promote->ifa_next;
393 
394 		if (prev_prom) {
395 			prev_prom->ifa_next = promote->ifa_next;
396 			promote->ifa_next = last_prim->ifa_next;
397 			last_prim->ifa_next = promote;
398 		}
399 
400 		promote->ifa_flags &= ~IFA_F_SECONDARY;
401 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
402 		blocking_notifier_call_chain(&inetaddr_chain,
403 				NETDEV_UP, promote);
404 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
405 			if (ifa1->ifa_mask != ifa->ifa_mask ||
406 			    !inet_ifa_match(ifa1->ifa_address, ifa))
407 					continue;
408 			fib_add_ifaddr(ifa);
409 		}
410 
411 	}
412 	if (destroy)
413 		inet_free_ifa(ifa1);
414 }
415 
416 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
417 			 int destroy)
418 {
419 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
420 }
421 
422 static void check_lifetime(struct work_struct *work);
423 
424 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
425 
426 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
427 			     u32 portid)
428 {
429 	struct in_device *in_dev = ifa->ifa_dev;
430 	struct in_ifaddr *ifa1, **ifap, **last_primary;
431 
432 	ASSERT_RTNL();
433 
434 	if (!ifa->ifa_local) {
435 		inet_free_ifa(ifa);
436 		return 0;
437 	}
438 
439 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
440 	last_primary = &in_dev->ifa_list;
441 
442 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
443 	     ifap = &ifa1->ifa_next) {
444 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
445 		    ifa->ifa_scope <= ifa1->ifa_scope)
446 			last_primary = &ifa1->ifa_next;
447 		if (ifa1->ifa_mask == ifa->ifa_mask &&
448 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
449 			if (ifa1->ifa_local == ifa->ifa_local) {
450 				inet_free_ifa(ifa);
451 				return -EEXIST;
452 			}
453 			if (ifa1->ifa_scope != ifa->ifa_scope) {
454 				inet_free_ifa(ifa);
455 				return -EINVAL;
456 			}
457 			ifa->ifa_flags |= IFA_F_SECONDARY;
458 		}
459 	}
460 
461 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
462 		net_srandom(ifa->ifa_local);
463 		ifap = last_primary;
464 	}
465 
466 	ifa->ifa_next = *ifap;
467 	*ifap = ifa;
468 
469 	inet_hash_insert(dev_net(in_dev->dev), ifa);
470 
471 	cancel_delayed_work(&check_lifetime_work);
472 	schedule_delayed_work(&check_lifetime_work, 0);
473 
474 	/* Send message first, then call notifier.
475 	   Notifier will trigger FIB update, so that
476 	   listeners of netlink will know about new ifaddr */
477 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
478 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
479 
480 	return 0;
481 }
482 
483 static int inet_insert_ifa(struct in_ifaddr *ifa)
484 {
485 	return __inet_insert_ifa(ifa, NULL, 0);
486 }
487 
488 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
489 {
490 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
491 
492 	ASSERT_RTNL();
493 
494 	if (!in_dev) {
495 		inet_free_ifa(ifa);
496 		return -ENOBUFS;
497 	}
498 	ipv4_devconf_setall(in_dev);
499 	if (ifa->ifa_dev != in_dev) {
500 		WARN_ON(ifa->ifa_dev);
501 		in_dev_hold(in_dev);
502 		ifa->ifa_dev = in_dev;
503 	}
504 	if (ipv4_is_loopback(ifa->ifa_local))
505 		ifa->ifa_scope = RT_SCOPE_HOST;
506 	return inet_insert_ifa(ifa);
507 }
508 
509 /* Caller must hold RCU or RTNL :
510  * We dont take a reference on found in_device
511  */
512 struct in_device *inetdev_by_index(struct net *net, int ifindex)
513 {
514 	struct net_device *dev;
515 	struct in_device *in_dev = NULL;
516 
517 	rcu_read_lock();
518 	dev = dev_get_by_index_rcu(net, ifindex);
519 	if (dev)
520 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
521 	rcu_read_unlock();
522 	return in_dev;
523 }
524 EXPORT_SYMBOL(inetdev_by_index);
525 
526 /* Called only from RTNL semaphored context. No locks. */
527 
528 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
529 				    __be32 mask)
530 {
531 	ASSERT_RTNL();
532 
533 	for_primary_ifa(in_dev) {
534 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
535 			return ifa;
536 	} endfor_ifa(in_dev);
537 	return NULL;
538 }
539 
540 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
541 {
542 	struct net *net = sock_net(skb->sk);
543 	struct nlattr *tb[IFA_MAX+1];
544 	struct in_device *in_dev;
545 	struct ifaddrmsg *ifm;
546 	struct in_ifaddr *ifa, **ifap;
547 	int err = -EINVAL;
548 
549 	ASSERT_RTNL();
550 
551 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
552 	if (err < 0)
553 		goto errout;
554 
555 	ifm = nlmsg_data(nlh);
556 	in_dev = inetdev_by_index(net, ifm->ifa_index);
557 	if (in_dev == NULL) {
558 		err = -ENODEV;
559 		goto errout;
560 	}
561 
562 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
563 	     ifap = &ifa->ifa_next) {
564 		if (tb[IFA_LOCAL] &&
565 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
566 			continue;
567 
568 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
569 			continue;
570 
571 		if (tb[IFA_ADDRESS] &&
572 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
573 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
574 			continue;
575 
576 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
577 		return 0;
578 	}
579 
580 	err = -EADDRNOTAVAIL;
581 errout:
582 	return err;
583 }
584 
585 #define INFINITY_LIFE_TIME	0xFFFFFFFF
586 
587 static void check_lifetime(struct work_struct *work)
588 {
589 	unsigned long now, next, next_sec, next_sched;
590 	struct in_ifaddr *ifa;
591 	struct hlist_node *n;
592 	int i;
593 
594 	now = jiffies;
595 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
596 
597 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
598 		bool change_needed = false;
599 
600 		rcu_read_lock();
601 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
602 			unsigned long age;
603 
604 			if (ifa->ifa_flags & IFA_F_PERMANENT)
605 				continue;
606 
607 			/* We try to batch several events at once. */
608 			age = (now - ifa->ifa_tstamp +
609 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
610 
611 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
612 			    age >= ifa->ifa_valid_lft) {
613 				change_needed = true;
614 			} else if (ifa->ifa_preferred_lft ==
615 				   INFINITY_LIFE_TIME) {
616 				continue;
617 			} else if (age >= ifa->ifa_preferred_lft) {
618 				if (time_before(ifa->ifa_tstamp +
619 						ifa->ifa_valid_lft * HZ, next))
620 					next = ifa->ifa_tstamp +
621 					       ifa->ifa_valid_lft * HZ;
622 
623 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
624 					change_needed = true;
625 			} else if (time_before(ifa->ifa_tstamp +
626 					       ifa->ifa_preferred_lft * HZ,
627 					       next)) {
628 				next = ifa->ifa_tstamp +
629 				       ifa->ifa_preferred_lft * HZ;
630 			}
631 		}
632 		rcu_read_unlock();
633 		if (!change_needed)
634 			continue;
635 		rtnl_lock();
636 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
637 			unsigned long age;
638 
639 			if (ifa->ifa_flags & IFA_F_PERMANENT)
640 				continue;
641 
642 			/* We try to batch several events at once. */
643 			age = (now - ifa->ifa_tstamp +
644 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
645 
646 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
647 			    age >= ifa->ifa_valid_lft) {
648 				struct in_ifaddr **ifap;
649 
650 				for (ifap = &ifa->ifa_dev->ifa_list;
651 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
652 					if (*ifap == ifa) {
653 						inet_del_ifa(ifa->ifa_dev,
654 							     ifap, 1);
655 						break;
656 					}
657 				}
658 			} else if (ifa->ifa_preferred_lft !=
659 				   INFINITY_LIFE_TIME &&
660 				   age >= ifa->ifa_preferred_lft &&
661 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
662 				ifa->ifa_flags |= IFA_F_DEPRECATED;
663 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
664 			}
665 		}
666 		rtnl_unlock();
667 	}
668 
669 	next_sec = round_jiffies_up(next);
670 	next_sched = next;
671 
672 	/* If rounded timeout is accurate enough, accept it. */
673 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
674 		next_sched = next_sec;
675 
676 	now = jiffies;
677 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
678 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
679 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
680 
681 	schedule_delayed_work(&check_lifetime_work, next_sched - now);
682 }
683 
684 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
685 			     __u32 prefered_lft)
686 {
687 	unsigned long timeout;
688 
689 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
690 
691 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
692 	if (addrconf_finite_timeout(timeout))
693 		ifa->ifa_valid_lft = timeout;
694 	else
695 		ifa->ifa_flags |= IFA_F_PERMANENT;
696 
697 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
698 	if (addrconf_finite_timeout(timeout)) {
699 		if (timeout == 0)
700 			ifa->ifa_flags |= IFA_F_DEPRECATED;
701 		ifa->ifa_preferred_lft = timeout;
702 	}
703 	ifa->ifa_tstamp = jiffies;
704 	if (!ifa->ifa_cstamp)
705 		ifa->ifa_cstamp = ifa->ifa_tstamp;
706 }
707 
708 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
709 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
710 {
711 	struct nlattr *tb[IFA_MAX+1];
712 	struct in_ifaddr *ifa;
713 	struct ifaddrmsg *ifm;
714 	struct net_device *dev;
715 	struct in_device *in_dev;
716 	int err;
717 
718 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
719 	if (err < 0)
720 		goto errout;
721 
722 	ifm = nlmsg_data(nlh);
723 	err = -EINVAL;
724 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
725 		goto errout;
726 
727 	dev = __dev_get_by_index(net, ifm->ifa_index);
728 	err = -ENODEV;
729 	if (dev == NULL)
730 		goto errout;
731 
732 	in_dev = __in_dev_get_rtnl(dev);
733 	err = -ENOBUFS;
734 	if (in_dev == NULL)
735 		goto errout;
736 
737 	ifa = inet_alloc_ifa();
738 	if (ifa == NULL)
739 		/*
740 		 * A potential indev allocation can be left alive, it stays
741 		 * assigned to its device and is destroy with it.
742 		 */
743 		goto errout;
744 
745 	ipv4_devconf_setall(in_dev);
746 	in_dev_hold(in_dev);
747 
748 	if (tb[IFA_ADDRESS] == NULL)
749 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
750 
751 	INIT_HLIST_NODE(&ifa->hash);
752 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
753 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
754 	ifa->ifa_flags = ifm->ifa_flags;
755 	ifa->ifa_scope = ifm->ifa_scope;
756 	ifa->ifa_dev = in_dev;
757 
758 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
759 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
760 
761 	if (tb[IFA_BROADCAST])
762 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
763 
764 	if (tb[IFA_LABEL])
765 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
766 	else
767 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
768 
769 	if (tb[IFA_CACHEINFO]) {
770 		struct ifa_cacheinfo *ci;
771 
772 		ci = nla_data(tb[IFA_CACHEINFO]);
773 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
774 			err = -EINVAL;
775 			goto errout_free;
776 		}
777 		*pvalid_lft = ci->ifa_valid;
778 		*pprefered_lft = ci->ifa_prefered;
779 	}
780 
781 	return ifa;
782 
783 errout_free:
784 	inet_free_ifa(ifa);
785 errout:
786 	return ERR_PTR(err);
787 }
788 
789 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
790 {
791 	struct in_device *in_dev = ifa->ifa_dev;
792 	struct in_ifaddr *ifa1, **ifap;
793 
794 	if (!ifa->ifa_local)
795 		return NULL;
796 
797 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
798 	     ifap = &ifa1->ifa_next) {
799 		if (ifa1->ifa_mask == ifa->ifa_mask &&
800 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
801 		    ifa1->ifa_local == ifa->ifa_local)
802 			return ifa1;
803 	}
804 	return NULL;
805 }
806 
807 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
808 {
809 	struct net *net = sock_net(skb->sk);
810 	struct in_ifaddr *ifa;
811 	struct in_ifaddr *ifa_existing;
812 	__u32 valid_lft = INFINITY_LIFE_TIME;
813 	__u32 prefered_lft = INFINITY_LIFE_TIME;
814 
815 	ASSERT_RTNL();
816 
817 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
818 	if (IS_ERR(ifa))
819 		return PTR_ERR(ifa);
820 
821 	ifa_existing = find_matching_ifa(ifa);
822 	if (!ifa_existing) {
823 		/* It would be best to check for !NLM_F_CREATE here but
824 		 * userspace alreay relies on not having to provide this.
825 		 */
826 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
827 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
828 	} else {
829 		inet_free_ifa(ifa);
830 
831 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
832 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
833 			return -EEXIST;
834 		ifa = ifa_existing;
835 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
836 		cancel_delayed_work(&check_lifetime_work);
837 		schedule_delayed_work(&check_lifetime_work, 0);
838 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
839 		blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
840 	}
841 	return 0;
842 }
843 
844 /*
845  *	Determine a default network mask, based on the IP address.
846  */
847 
848 static int inet_abc_len(__be32 addr)
849 {
850 	int rc = -1;	/* Something else, probably a multicast. */
851 
852 	if (ipv4_is_zeronet(addr))
853 		rc = 0;
854 	else {
855 		__u32 haddr = ntohl(addr);
856 
857 		if (IN_CLASSA(haddr))
858 			rc = 8;
859 		else if (IN_CLASSB(haddr))
860 			rc = 16;
861 		else if (IN_CLASSC(haddr))
862 			rc = 24;
863 	}
864 
865 	return rc;
866 }
867 
868 
869 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
870 {
871 	struct ifreq ifr;
872 	struct sockaddr_in sin_orig;
873 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
874 	struct in_device *in_dev;
875 	struct in_ifaddr **ifap = NULL;
876 	struct in_ifaddr *ifa = NULL;
877 	struct net_device *dev;
878 	char *colon;
879 	int ret = -EFAULT;
880 	int tryaddrmatch = 0;
881 
882 	/*
883 	 *	Fetch the caller's info block into kernel space
884 	 */
885 
886 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
887 		goto out;
888 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
889 
890 	/* save original address for comparison */
891 	memcpy(&sin_orig, sin, sizeof(*sin));
892 
893 	colon = strchr(ifr.ifr_name, ':');
894 	if (colon)
895 		*colon = 0;
896 
897 	dev_load(net, ifr.ifr_name);
898 
899 	switch (cmd) {
900 	case SIOCGIFADDR:	/* Get interface address */
901 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
902 	case SIOCGIFDSTADDR:	/* Get the destination address */
903 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
904 		/* Note that these ioctls will not sleep,
905 		   so that we do not impose a lock.
906 		   One day we will be forced to put shlock here (I mean SMP)
907 		 */
908 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
909 		memset(sin, 0, sizeof(*sin));
910 		sin->sin_family = AF_INET;
911 		break;
912 
913 	case SIOCSIFFLAGS:
914 		ret = -EPERM;
915 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
916 			goto out;
917 		break;
918 	case SIOCSIFADDR:	/* Set interface address (and family) */
919 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
920 	case SIOCSIFDSTADDR:	/* Set the destination address */
921 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
922 		ret = -EPERM;
923 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
924 			goto out;
925 		ret = -EINVAL;
926 		if (sin->sin_family != AF_INET)
927 			goto out;
928 		break;
929 	default:
930 		ret = -EINVAL;
931 		goto out;
932 	}
933 
934 	rtnl_lock();
935 
936 	ret = -ENODEV;
937 	dev = __dev_get_by_name(net, ifr.ifr_name);
938 	if (!dev)
939 		goto done;
940 
941 	if (colon)
942 		*colon = ':';
943 
944 	in_dev = __in_dev_get_rtnl(dev);
945 	if (in_dev) {
946 		if (tryaddrmatch) {
947 			/* Matthias Andree */
948 			/* compare label and address (4.4BSD style) */
949 			/* note: we only do this for a limited set of ioctls
950 			   and only if the original address family was AF_INET.
951 			   This is checked above. */
952 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
953 			     ifap = &ifa->ifa_next) {
954 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
955 				    sin_orig.sin_addr.s_addr ==
956 							ifa->ifa_local) {
957 					break; /* found */
958 				}
959 			}
960 		}
961 		/* we didn't get a match, maybe the application is
962 		   4.3BSD-style and passed in junk so we fall back to
963 		   comparing just the label */
964 		if (!ifa) {
965 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
966 			     ifap = &ifa->ifa_next)
967 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
968 					break;
969 		}
970 	}
971 
972 	ret = -EADDRNOTAVAIL;
973 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
974 		goto done;
975 
976 	switch (cmd) {
977 	case SIOCGIFADDR:	/* Get interface address */
978 		sin->sin_addr.s_addr = ifa->ifa_local;
979 		goto rarok;
980 
981 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
982 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
983 		goto rarok;
984 
985 	case SIOCGIFDSTADDR:	/* Get the destination address */
986 		sin->sin_addr.s_addr = ifa->ifa_address;
987 		goto rarok;
988 
989 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
990 		sin->sin_addr.s_addr = ifa->ifa_mask;
991 		goto rarok;
992 
993 	case SIOCSIFFLAGS:
994 		if (colon) {
995 			ret = -EADDRNOTAVAIL;
996 			if (!ifa)
997 				break;
998 			ret = 0;
999 			if (!(ifr.ifr_flags & IFF_UP))
1000 				inet_del_ifa(in_dev, ifap, 1);
1001 			break;
1002 		}
1003 		ret = dev_change_flags(dev, ifr.ifr_flags);
1004 		break;
1005 
1006 	case SIOCSIFADDR:	/* Set interface address (and family) */
1007 		ret = -EINVAL;
1008 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1009 			break;
1010 
1011 		if (!ifa) {
1012 			ret = -ENOBUFS;
1013 			ifa = inet_alloc_ifa();
1014 			if (!ifa)
1015 				break;
1016 			INIT_HLIST_NODE(&ifa->hash);
1017 			if (colon)
1018 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1019 			else
1020 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1021 		} else {
1022 			ret = 0;
1023 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1024 				break;
1025 			inet_del_ifa(in_dev, ifap, 0);
1026 			ifa->ifa_broadcast = 0;
1027 			ifa->ifa_scope = 0;
1028 		}
1029 
1030 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1031 
1032 		if (!(dev->flags & IFF_POINTOPOINT)) {
1033 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1034 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1035 			if ((dev->flags & IFF_BROADCAST) &&
1036 			    ifa->ifa_prefixlen < 31)
1037 				ifa->ifa_broadcast = ifa->ifa_address |
1038 						     ~ifa->ifa_mask;
1039 		} else {
1040 			ifa->ifa_prefixlen = 32;
1041 			ifa->ifa_mask = inet_make_mask(32);
1042 		}
1043 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1044 		ret = inet_set_ifa(dev, ifa);
1045 		break;
1046 
1047 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1048 		ret = 0;
1049 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1050 			inet_del_ifa(in_dev, ifap, 0);
1051 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1052 			inet_insert_ifa(ifa);
1053 		}
1054 		break;
1055 
1056 	case SIOCSIFDSTADDR:	/* Set the destination address */
1057 		ret = 0;
1058 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1059 			break;
1060 		ret = -EINVAL;
1061 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1062 			break;
1063 		ret = 0;
1064 		inet_del_ifa(in_dev, ifap, 0);
1065 		ifa->ifa_address = sin->sin_addr.s_addr;
1066 		inet_insert_ifa(ifa);
1067 		break;
1068 
1069 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1070 
1071 		/*
1072 		 *	The mask we set must be legal.
1073 		 */
1074 		ret = -EINVAL;
1075 		if (bad_mask(sin->sin_addr.s_addr, 0))
1076 			break;
1077 		ret = 0;
1078 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1079 			__be32 old_mask = ifa->ifa_mask;
1080 			inet_del_ifa(in_dev, ifap, 0);
1081 			ifa->ifa_mask = sin->sin_addr.s_addr;
1082 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1083 
1084 			/* See if current broadcast address matches
1085 			 * with current netmask, then recalculate
1086 			 * the broadcast address. Otherwise it's a
1087 			 * funny address, so don't touch it since
1088 			 * the user seems to know what (s)he's doing...
1089 			 */
1090 			if ((dev->flags & IFF_BROADCAST) &&
1091 			    (ifa->ifa_prefixlen < 31) &&
1092 			    (ifa->ifa_broadcast ==
1093 			     (ifa->ifa_local|~old_mask))) {
1094 				ifa->ifa_broadcast = (ifa->ifa_local |
1095 						      ~sin->sin_addr.s_addr);
1096 			}
1097 			inet_insert_ifa(ifa);
1098 		}
1099 		break;
1100 	}
1101 done:
1102 	rtnl_unlock();
1103 out:
1104 	return ret;
1105 rarok:
1106 	rtnl_unlock();
1107 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1108 	goto out;
1109 }
1110 
1111 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1112 {
1113 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1114 	struct in_ifaddr *ifa;
1115 	struct ifreq ifr;
1116 	int done = 0;
1117 
1118 	if (!in_dev)
1119 		goto out;
1120 
1121 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1122 		if (!buf) {
1123 			done += sizeof(ifr);
1124 			continue;
1125 		}
1126 		if (len < (int) sizeof(ifr))
1127 			break;
1128 		memset(&ifr, 0, sizeof(struct ifreq));
1129 		if (ifa->ifa_label)
1130 			strcpy(ifr.ifr_name, ifa->ifa_label);
1131 		else
1132 			strcpy(ifr.ifr_name, dev->name);
1133 
1134 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1135 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1136 								ifa->ifa_local;
1137 
1138 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1139 			done = -EFAULT;
1140 			break;
1141 		}
1142 		buf  += sizeof(struct ifreq);
1143 		len  -= sizeof(struct ifreq);
1144 		done += sizeof(struct ifreq);
1145 	}
1146 out:
1147 	return done;
1148 }
1149 
1150 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1151 {
1152 	__be32 addr = 0;
1153 	struct in_device *in_dev;
1154 	struct net *net = dev_net(dev);
1155 
1156 	rcu_read_lock();
1157 	in_dev = __in_dev_get_rcu(dev);
1158 	if (!in_dev)
1159 		goto no_in_dev;
1160 
1161 	for_primary_ifa(in_dev) {
1162 		if (ifa->ifa_scope > scope)
1163 			continue;
1164 		if (!dst || inet_ifa_match(dst, ifa)) {
1165 			addr = ifa->ifa_local;
1166 			break;
1167 		}
1168 		if (!addr)
1169 			addr = ifa->ifa_local;
1170 	} endfor_ifa(in_dev);
1171 
1172 	if (addr)
1173 		goto out_unlock;
1174 no_in_dev:
1175 
1176 	/* Not loopback addresses on loopback should be preferred
1177 	   in this case. It is importnat that lo is the first interface
1178 	   in dev_base list.
1179 	 */
1180 	for_each_netdev_rcu(net, dev) {
1181 		in_dev = __in_dev_get_rcu(dev);
1182 		if (!in_dev)
1183 			continue;
1184 
1185 		for_primary_ifa(in_dev) {
1186 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1187 			    ifa->ifa_scope <= scope) {
1188 				addr = ifa->ifa_local;
1189 				goto out_unlock;
1190 			}
1191 		} endfor_ifa(in_dev);
1192 	}
1193 out_unlock:
1194 	rcu_read_unlock();
1195 	return addr;
1196 }
1197 EXPORT_SYMBOL(inet_select_addr);
1198 
1199 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1200 			      __be32 local, int scope)
1201 {
1202 	int same = 0;
1203 	__be32 addr = 0;
1204 
1205 	for_ifa(in_dev) {
1206 		if (!addr &&
1207 		    (local == ifa->ifa_local || !local) &&
1208 		    ifa->ifa_scope <= scope) {
1209 			addr = ifa->ifa_local;
1210 			if (same)
1211 				break;
1212 		}
1213 		if (!same) {
1214 			same = (!local || inet_ifa_match(local, ifa)) &&
1215 				(!dst || inet_ifa_match(dst, ifa));
1216 			if (same && addr) {
1217 				if (local || !dst)
1218 					break;
1219 				/* Is the selected addr into dst subnet? */
1220 				if (inet_ifa_match(addr, ifa))
1221 					break;
1222 				/* No, then can we use new local src? */
1223 				if (ifa->ifa_scope <= scope) {
1224 					addr = ifa->ifa_local;
1225 					break;
1226 				}
1227 				/* search for large dst subnet for addr */
1228 				same = 0;
1229 			}
1230 		}
1231 	} endfor_ifa(in_dev);
1232 
1233 	return same ? addr : 0;
1234 }
1235 
1236 /*
1237  * Confirm that local IP address exists using wildcards:
1238  * - in_dev: only on this interface, 0=any interface
1239  * - dst: only in the same subnet as dst, 0=any dst
1240  * - local: address, 0=autoselect the local address
1241  * - scope: maximum allowed scope value for the local address
1242  */
1243 __be32 inet_confirm_addr(struct in_device *in_dev,
1244 			 __be32 dst, __be32 local, int scope)
1245 {
1246 	__be32 addr = 0;
1247 	struct net_device *dev;
1248 	struct net *net;
1249 
1250 	if (scope != RT_SCOPE_LINK)
1251 		return confirm_addr_indev(in_dev, dst, local, scope);
1252 
1253 	net = dev_net(in_dev->dev);
1254 	rcu_read_lock();
1255 	for_each_netdev_rcu(net, dev) {
1256 		in_dev = __in_dev_get_rcu(dev);
1257 		if (in_dev) {
1258 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1259 			if (addr)
1260 				break;
1261 		}
1262 	}
1263 	rcu_read_unlock();
1264 
1265 	return addr;
1266 }
1267 EXPORT_SYMBOL(inet_confirm_addr);
1268 
1269 /*
1270  *	Device notifier
1271  */
1272 
1273 int register_inetaddr_notifier(struct notifier_block *nb)
1274 {
1275 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1276 }
1277 EXPORT_SYMBOL(register_inetaddr_notifier);
1278 
1279 int unregister_inetaddr_notifier(struct notifier_block *nb)
1280 {
1281 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1282 }
1283 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1284 
1285 /* Rename ifa_labels for a device name change. Make some effort to preserve
1286  * existing alias numbering and to create unique labels if possible.
1287 */
1288 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1289 {
1290 	struct in_ifaddr *ifa;
1291 	int named = 0;
1292 
1293 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1294 		char old[IFNAMSIZ], *dot;
1295 
1296 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1297 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1298 		if (named++ == 0)
1299 			goto skip;
1300 		dot = strchr(old, ':');
1301 		if (dot == NULL) {
1302 			sprintf(old, ":%d", named);
1303 			dot = old;
1304 		}
1305 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1306 			strcat(ifa->ifa_label, dot);
1307 		else
1308 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1309 skip:
1310 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1311 	}
1312 }
1313 
1314 static bool inetdev_valid_mtu(unsigned int mtu)
1315 {
1316 	return mtu >= 68;
1317 }
1318 
1319 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1320 					struct in_device *in_dev)
1321 
1322 {
1323 	struct in_ifaddr *ifa;
1324 
1325 	for (ifa = in_dev->ifa_list; ifa;
1326 	     ifa = ifa->ifa_next) {
1327 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1328 			 ifa->ifa_local, dev,
1329 			 ifa->ifa_local, NULL,
1330 			 dev->dev_addr, NULL);
1331 	}
1332 }
1333 
1334 /* Called only under RTNL semaphore */
1335 
1336 static int inetdev_event(struct notifier_block *this, unsigned long event,
1337 			 void *ptr)
1338 {
1339 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1340 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1341 
1342 	ASSERT_RTNL();
1343 
1344 	if (!in_dev) {
1345 		if (event == NETDEV_REGISTER) {
1346 			in_dev = inetdev_init(dev);
1347 			if (!in_dev)
1348 				return notifier_from_errno(-ENOMEM);
1349 			if (dev->flags & IFF_LOOPBACK) {
1350 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1351 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1352 			}
1353 		} else if (event == NETDEV_CHANGEMTU) {
1354 			/* Re-enabling IP */
1355 			if (inetdev_valid_mtu(dev->mtu))
1356 				in_dev = inetdev_init(dev);
1357 		}
1358 		goto out;
1359 	}
1360 
1361 	switch (event) {
1362 	case NETDEV_REGISTER:
1363 		pr_debug("%s: bug\n", __func__);
1364 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1365 		break;
1366 	case NETDEV_UP:
1367 		if (!inetdev_valid_mtu(dev->mtu))
1368 			break;
1369 		if (dev->flags & IFF_LOOPBACK) {
1370 			struct in_ifaddr *ifa = inet_alloc_ifa();
1371 
1372 			if (ifa) {
1373 				INIT_HLIST_NODE(&ifa->hash);
1374 				ifa->ifa_local =
1375 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1376 				ifa->ifa_prefixlen = 8;
1377 				ifa->ifa_mask = inet_make_mask(8);
1378 				in_dev_hold(in_dev);
1379 				ifa->ifa_dev = in_dev;
1380 				ifa->ifa_scope = RT_SCOPE_HOST;
1381 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1382 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1383 						 INFINITY_LIFE_TIME);
1384 				inet_insert_ifa(ifa);
1385 			}
1386 		}
1387 		ip_mc_up(in_dev);
1388 		/* fall through */
1389 	case NETDEV_CHANGEADDR:
1390 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1391 			break;
1392 		/* fall through */
1393 	case NETDEV_NOTIFY_PEERS:
1394 		/* Send gratuitous ARP to notify of link change */
1395 		inetdev_send_gratuitous_arp(dev, in_dev);
1396 		break;
1397 	case NETDEV_DOWN:
1398 		ip_mc_down(in_dev);
1399 		break;
1400 	case NETDEV_PRE_TYPE_CHANGE:
1401 		ip_mc_unmap(in_dev);
1402 		break;
1403 	case NETDEV_POST_TYPE_CHANGE:
1404 		ip_mc_remap(in_dev);
1405 		break;
1406 	case NETDEV_CHANGEMTU:
1407 		if (inetdev_valid_mtu(dev->mtu))
1408 			break;
1409 		/* disable IP when MTU is not enough */
1410 	case NETDEV_UNREGISTER:
1411 		inetdev_destroy(in_dev);
1412 		break;
1413 	case NETDEV_CHANGENAME:
1414 		/* Do not notify about label change, this event is
1415 		 * not interesting to applications using netlink.
1416 		 */
1417 		inetdev_changename(dev, in_dev);
1418 
1419 		devinet_sysctl_unregister(in_dev);
1420 		devinet_sysctl_register(in_dev);
1421 		break;
1422 	}
1423 out:
1424 	return NOTIFY_DONE;
1425 }
1426 
1427 static struct notifier_block ip_netdev_notifier = {
1428 	.notifier_call = inetdev_event,
1429 };
1430 
1431 static size_t inet_nlmsg_size(void)
1432 {
1433 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1434 	       + nla_total_size(4) /* IFA_ADDRESS */
1435 	       + nla_total_size(4) /* IFA_LOCAL */
1436 	       + nla_total_size(4) /* IFA_BROADCAST */
1437 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1438 }
1439 
1440 static inline u32 cstamp_delta(unsigned long cstamp)
1441 {
1442 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1443 }
1444 
1445 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1446 			 unsigned long tstamp, u32 preferred, u32 valid)
1447 {
1448 	struct ifa_cacheinfo ci;
1449 
1450 	ci.cstamp = cstamp_delta(cstamp);
1451 	ci.tstamp = cstamp_delta(tstamp);
1452 	ci.ifa_prefered = preferred;
1453 	ci.ifa_valid = valid;
1454 
1455 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1456 }
1457 
1458 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1459 			    u32 portid, u32 seq, int event, unsigned int flags)
1460 {
1461 	struct ifaddrmsg *ifm;
1462 	struct nlmsghdr  *nlh;
1463 	u32 preferred, valid;
1464 
1465 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1466 	if (nlh == NULL)
1467 		return -EMSGSIZE;
1468 
1469 	ifm = nlmsg_data(nlh);
1470 	ifm->ifa_family = AF_INET;
1471 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1472 	ifm->ifa_flags = ifa->ifa_flags;
1473 	ifm->ifa_scope = ifa->ifa_scope;
1474 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1475 
1476 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1477 		preferred = ifa->ifa_preferred_lft;
1478 		valid = ifa->ifa_valid_lft;
1479 		if (preferred != INFINITY_LIFE_TIME) {
1480 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1481 
1482 			if (preferred > tval)
1483 				preferred -= tval;
1484 			else
1485 				preferred = 0;
1486 			if (valid != INFINITY_LIFE_TIME) {
1487 				if (valid > tval)
1488 					valid -= tval;
1489 				else
1490 					valid = 0;
1491 			}
1492 		}
1493 	} else {
1494 		preferred = INFINITY_LIFE_TIME;
1495 		valid = INFINITY_LIFE_TIME;
1496 	}
1497 	if ((ifa->ifa_address &&
1498 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1499 	    (ifa->ifa_local &&
1500 	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1501 	    (ifa->ifa_broadcast &&
1502 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1503 	    (ifa->ifa_label[0] &&
1504 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1505 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1506 			  preferred, valid))
1507 		goto nla_put_failure;
1508 
1509 	return nlmsg_end(skb, nlh);
1510 
1511 nla_put_failure:
1512 	nlmsg_cancel(skb, nlh);
1513 	return -EMSGSIZE;
1514 }
1515 
1516 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1517 {
1518 	struct net *net = sock_net(skb->sk);
1519 	int h, s_h;
1520 	int idx, s_idx;
1521 	int ip_idx, s_ip_idx;
1522 	struct net_device *dev;
1523 	struct in_device *in_dev;
1524 	struct in_ifaddr *ifa;
1525 	struct hlist_head *head;
1526 
1527 	s_h = cb->args[0];
1528 	s_idx = idx = cb->args[1];
1529 	s_ip_idx = ip_idx = cb->args[2];
1530 
1531 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1532 		idx = 0;
1533 		head = &net->dev_index_head[h];
1534 		rcu_read_lock();
1535 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1536 			  net->dev_base_seq;
1537 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1538 			if (idx < s_idx)
1539 				goto cont;
1540 			if (h > s_h || idx > s_idx)
1541 				s_ip_idx = 0;
1542 			in_dev = __in_dev_get_rcu(dev);
1543 			if (!in_dev)
1544 				goto cont;
1545 
1546 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1547 			     ifa = ifa->ifa_next, ip_idx++) {
1548 				if (ip_idx < s_ip_idx)
1549 					continue;
1550 				if (inet_fill_ifaddr(skb, ifa,
1551 					     NETLINK_CB(cb->skb).portid,
1552 					     cb->nlh->nlmsg_seq,
1553 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1554 					rcu_read_unlock();
1555 					goto done;
1556 				}
1557 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1558 			}
1559 cont:
1560 			idx++;
1561 		}
1562 		rcu_read_unlock();
1563 	}
1564 
1565 done:
1566 	cb->args[0] = h;
1567 	cb->args[1] = idx;
1568 	cb->args[2] = ip_idx;
1569 
1570 	return skb->len;
1571 }
1572 
1573 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1574 		      u32 portid)
1575 {
1576 	struct sk_buff *skb;
1577 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1578 	int err = -ENOBUFS;
1579 	struct net *net;
1580 
1581 	net = dev_net(ifa->ifa_dev->dev);
1582 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1583 	if (skb == NULL)
1584 		goto errout;
1585 
1586 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1587 	if (err < 0) {
1588 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1589 		WARN_ON(err == -EMSGSIZE);
1590 		kfree_skb(skb);
1591 		goto errout;
1592 	}
1593 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1594 	return;
1595 errout:
1596 	if (err < 0)
1597 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1598 }
1599 
1600 static size_t inet_get_link_af_size(const struct net_device *dev)
1601 {
1602 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1603 
1604 	if (!in_dev)
1605 		return 0;
1606 
1607 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1608 }
1609 
1610 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1611 {
1612 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1613 	struct nlattr *nla;
1614 	int i;
1615 
1616 	if (!in_dev)
1617 		return -ENODATA;
1618 
1619 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1620 	if (nla == NULL)
1621 		return -EMSGSIZE;
1622 
1623 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1624 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1625 
1626 	return 0;
1627 }
1628 
1629 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1630 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1631 };
1632 
1633 static int inet_validate_link_af(const struct net_device *dev,
1634 				 const struct nlattr *nla)
1635 {
1636 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1637 	int err, rem;
1638 
1639 	if (dev && !__in_dev_get_rtnl(dev))
1640 		return -EAFNOSUPPORT;
1641 
1642 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1643 	if (err < 0)
1644 		return err;
1645 
1646 	if (tb[IFLA_INET_CONF]) {
1647 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1648 			int cfgid = nla_type(a);
1649 
1650 			if (nla_len(a) < 4)
1651 				return -EINVAL;
1652 
1653 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1654 				return -EINVAL;
1655 		}
1656 	}
1657 
1658 	return 0;
1659 }
1660 
1661 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1662 {
1663 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1664 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1665 	int rem;
1666 
1667 	if (!in_dev)
1668 		return -EAFNOSUPPORT;
1669 
1670 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1671 		BUG();
1672 
1673 	if (tb[IFLA_INET_CONF]) {
1674 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1675 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1676 	}
1677 
1678 	return 0;
1679 }
1680 
1681 static int inet_netconf_msgsize_devconf(int type)
1682 {
1683 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1684 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1685 
1686 	/* type -1 is used for ALL */
1687 	if (type == -1 || type == NETCONFA_FORWARDING)
1688 		size += nla_total_size(4);
1689 	if (type == -1 || type == NETCONFA_RP_FILTER)
1690 		size += nla_total_size(4);
1691 	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1692 		size += nla_total_size(4);
1693 
1694 	return size;
1695 }
1696 
1697 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1698 				     struct ipv4_devconf *devconf, u32 portid,
1699 				     u32 seq, int event, unsigned int flags,
1700 				     int type)
1701 {
1702 	struct nlmsghdr  *nlh;
1703 	struct netconfmsg *ncm;
1704 
1705 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1706 			flags);
1707 	if (nlh == NULL)
1708 		return -EMSGSIZE;
1709 
1710 	ncm = nlmsg_data(nlh);
1711 	ncm->ncm_family = AF_INET;
1712 
1713 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1714 		goto nla_put_failure;
1715 
1716 	/* type -1 is used for ALL */
1717 	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1718 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1719 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1720 		goto nla_put_failure;
1721 	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1722 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1723 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1724 		goto nla_put_failure;
1725 	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1726 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1727 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1728 		goto nla_put_failure;
1729 
1730 	return nlmsg_end(skb, nlh);
1731 
1732 nla_put_failure:
1733 	nlmsg_cancel(skb, nlh);
1734 	return -EMSGSIZE;
1735 }
1736 
1737 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1738 				 struct ipv4_devconf *devconf)
1739 {
1740 	struct sk_buff *skb;
1741 	int err = -ENOBUFS;
1742 
1743 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1744 	if (skb == NULL)
1745 		goto errout;
1746 
1747 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1748 					RTM_NEWNETCONF, 0, type);
1749 	if (err < 0) {
1750 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1751 		WARN_ON(err == -EMSGSIZE);
1752 		kfree_skb(skb);
1753 		goto errout;
1754 	}
1755 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1756 	return;
1757 errout:
1758 	if (err < 0)
1759 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1760 }
1761 
1762 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1763 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1764 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1765 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1766 };
1767 
1768 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1769 				    struct nlmsghdr *nlh)
1770 {
1771 	struct net *net = sock_net(in_skb->sk);
1772 	struct nlattr *tb[NETCONFA_MAX+1];
1773 	struct netconfmsg *ncm;
1774 	struct sk_buff *skb;
1775 	struct ipv4_devconf *devconf;
1776 	struct in_device *in_dev;
1777 	struct net_device *dev;
1778 	int ifindex;
1779 	int err;
1780 
1781 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1782 			  devconf_ipv4_policy);
1783 	if (err < 0)
1784 		goto errout;
1785 
1786 	err = EINVAL;
1787 	if (!tb[NETCONFA_IFINDEX])
1788 		goto errout;
1789 
1790 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1791 	switch (ifindex) {
1792 	case NETCONFA_IFINDEX_ALL:
1793 		devconf = net->ipv4.devconf_all;
1794 		break;
1795 	case NETCONFA_IFINDEX_DEFAULT:
1796 		devconf = net->ipv4.devconf_dflt;
1797 		break;
1798 	default:
1799 		dev = __dev_get_by_index(net, ifindex);
1800 		if (dev == NULL)
1801 			goto errout;
1802 		in_dev = __in_dev_get_rtnl(dev);
1803 		if (in_dev == NULL)
1804 			goto errout;
1805 		devconf = &in_dev->cnf;
1806 		break;
1807 	}
1808 
1809 	err = -ENOBUFS;
1810 	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1811 	if (skb == NULL)
1812 		goto errout;
1813 
1814 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1815 					NETLINK_CB(in_skb).portid,
1816 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1817 					-1);
1818 	if (err < 0) {
1819 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1820 		WARN_ON(err == -EMSGSIZE);
1821 		kfree_skb(skb);
1822 		goto errout;
1823 	}
1824 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1825 errout:
1826 	return err;
1827 }
1828 
1829 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1830 				     struct netlink_callback *cb)
1831 {
1832 	struct net *net = sock_net(skb->sk);
1833 	int h, s_h;
1834 	int idx, s_idx;
1835 	struct net_device *dev;
1836 	struct in_device *in_dev;
1837 	struct hlist_head *head;
1838 
1839 	s_h = cb->args[0];
1840 	s_idx = idx = cb->args[1];
1841 
1842 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1843 		idx = 0;
1844 		head = &net->dev_index_head[h];
1845 		rcu_read_lock();
1846 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1847 			  net->dev_base_seq;
1848 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1849 			if (idx < s_idx)
1850 				goto cont;
1851 			in_dev = __in_dev_get_rcu(dev);
1852 			if (!in_dev)
1853 				goto cont;
1854 
1855 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1856 						      &in_dev->cnf,
1857 						      NETLINK_CB(cb->skb).portid,
1858 						      cb->nlh->nlmsg_seq,
1859 						      RTM_NEWNETCONF,
1860 						      NLM_F_MULTI,
1861 						      -1) <= 0) {
1862 				rcu_read_unlock();
1863 				goto done;
1864 			}
1865 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1866 cont:
1867 			idx++;
1868 		}
1869 		rcu_read_unlock();
1870 	}
1871 	if (h == NETDEV_HASHENTRIES) {
1872 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1873 					      net->ipv4.devconf_all,
1874 					      NETLINK_CB(cb->skb).portid,
1875 					      cb->nlh->nlmsg_seq,
1876 					      RTM_NEWNETCONF, NLM_F_MULTI,
1877 					      -1) <= 0)
1878 			goto done;
1879 		else
1880 			h++;
1881 	}
1882 	if (h == NETDEV_HASHENTRIES + 1) {
1883 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1884 					      net->ipv4.devconf_dflt,
1885 					      NETLINK_CB(cb->skb).portid,
1886 					      cb->nlh->nlmsg_seq,
1887 					      RTM_NEWNETCONF, NLM_F_MULTI,
1888 					      -1) <= 0)
1889 			goto done;
1890 		else
1891 			h++;
1892 	}
1893 done:
1894 	cb->args[0] = h;
1895 	cb->args[1] = idx;
1896 
1897 	return skb->len;
1898 }
1899 
1900 #ifdef CONFIG_SYSCTL
1901 
1902 static void devinet_copy_dflt_conf(struct net *net, int i)
1903 {
1904 	struct net_device *dev;
1905 
1906 	rcu_read_lock();
1907 	for_each_netdev_rcu(net, dev) {
1908 		struct in_device *in_dev;
1909 
1910 		in_dev = __in_dev_get_rcu(dev);
1911 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1912 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1913 	}
1914 	rcu_read_unlock();
1915 }
1916 
1917 /* called with RTNL locked */
1918 static void inet_forward_change(struct net *net)
1919 {
1920 	struct net_device *dev;
1921 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1922 
1923 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1924 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1925 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1926 				    NETCONFA_IFINDEX_ALL,
1927 				    net->ipv4.devconf_all);
1928 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1929 				    NETCONFA_IFINDEX_DEFAULT,
1930 				    net->ipv4.devconf_dflt);
1931 
1932 	for_each_netdev(net, dev) {
1933 		struct in_device *in_dev;
1934 		if (on)
1935 			dev_disable_lro(dev);
1936 		rcu_read_lock();
1937 		in_dev = __in_dev_get_rcu(dev);
1938 		if (in_dev) {
1939 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1940 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1941 						    dev->ifindex, &in_dev->cnf);
1942 		}
1943 		rcu_read_unlock();
1944 	}
1945 }
1946 
1947 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1948 			     void __user *buffer,
1949 			     size_t *lenp, loff_t *ppos)
1950 {
1951 	int old_value = *(int *)ctl->data;
1952 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1953 	int new_value = *(int *)ctl->data;
1954 
1955 	if (write) {
1956 		struct ipv4_devconf *cnf = ctl->extra1;
1957 		struct net *net = ctl->extra2;
1958 		int i = (int *)ctl->data - cnf->data;
1959 
1960 		set_bit(i, cnf->state);
1961 
1962 		if (cnf == net->ipv4.devconf_dflt)
1963 			devinet_copy_dflt_conf(net, i);
1964 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1965 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1966 			if ((new_value == 0) && (old_value != 0))
1967 				rt_cache_flush(net);
1968 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1969 		    new_value != old_value) {
1970 			int ifindex;
1971 
1972 			if (cnf == net->ipv4.devconf_dflt)
1973 				ifindex = NETCONFA_IFINDEX_DEFAULT;
1974 			else if (cnf == net->ipv4.devconf_all)
1975 				ifindex = NETCONFA_IFINDEX_ALL;
1976 			else {
1977 				struct in_device *idev =
1978 					container_of(cnf, struct in_device,
1979 						     cnf);
1980 				ifindex = idev->dev->ifindex;
1981 			}
1982 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1983 						    ifindex, cnf);
1984 		}
1985 	}
1986 
1987 	return ret;
1988 }
1989 
1990 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
1991 				  void __user *buffer,
1992 				  size_t *lenp, loff_t *ppos)
1993 {
1994 	int *valp = ctl->data;
1995 	int val = *valp;
1996 	loff_t pos = *ppos;
1997 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1998 
1999 	if (write && *valp != val) {
2000 		struct net *net = ctl->extra2;
2001 
2002 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2003 			if (!rtnl_trylock()) {
2004 				/* Restore the original values before restarting */
2005 				*valp = val;
2006 				*ppos = pos;
2007 				return restart_syscall();
2008 			}
2009 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2010 				inet_forward_change(net);
2011 			} else {
2012 				struct ipv4_devconf *cnf = ctl->extra1;
2013 				struct in_device *idev =
2014 					container_of(cnf, struct in_device, cnf);
2015 				if (*valp)
2016 					dev_disable_lro(idev->dev);
2017 				inet_netconf_notify_devconf(net,
2018 							    NETCONFA_FORWARDING,
2019 							    idev->dev->ifindex,
2020 							    cnf);
2021 			}
2022 			rtnl_unlock();
2023 			rt_cache_flush(net);
2024 		} else
2025 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2026 						    NETCONFA_IFINDEX_DEFAULT,
2027 						    net->ipv4.devconf_dflt);
2028 	}
2029 
2030 	return ret;
2031 }
2032 
2033 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2034 				void __user *buffer,
2035 				size_t *lenp, loff_t *ppos)
2036 {
2037 	int *valp = ctl->data;
2038 	int val = *valp;
2039 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2040 	struct net *net = ctl->extra2;
2041 
2042 	if (write && *valp != val)
2043 		rt_cache_flush(net);
2044 
2045 	return ret;
2046 }
2047 
2048 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2049 	{ \
2050 		.procname	= name, \
2051 		.data		= ipv4_devconf.data + \
2052 				  IPV4_DEVCONF_ ## attr - 1, \
2053 		.maxlen		= sizeof(int), \
2054 		.mode		= mval, \
2055 		.proc_handler	= proc, \
2056 		.extra1		= &ipv4_devconf, \
2057 	}
2058 
2059 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2060 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2061 
2062 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2063 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2064 
2065 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2066 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2067 
2068 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2069 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2070 
2071 static struct devinet_sysctl_table {
2072 	struct ctl_table_header *sysctl_header;
2073 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2074 } devinet_sysctl = {
2075 	.devinet_vars = {
2076 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2077 					     devinet_sysctl_forward),
2078 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2079 
2080 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2081 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2082 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2083 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2084 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2085 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2086 					"accept_source_route"),
2087 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2088 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2089 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2090 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2091 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2092 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2093 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2094 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2095 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2096 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2097 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2098 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2099 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2100 
2101 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2102 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2103 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2104 					      "force_igmp_version"),
2105 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2106 					      "promote_secondaries"),
2107 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2108 					      "route_localnet"),
2109 	},
2110 };
2111 
2112 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2113 					struct ipv4_devconf *p)
2114 {
2115 	int i;
2116 	struct devinet_sysctl_table *t;
2117 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2118 
2119 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2120 	if (!t)
2121 		goto out;
2122 
2123 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2124 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2125 		t->devinet_vars[i].extra1 = p;
2126 		t->devinet_vars[i].extra2 = net;
2127 	}
2128 
2129 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2130 
2131 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2132 	if (!t->sysctl_header)
2133 		goto free;
2134 
2135 	p->sysctl = t;
2136 	return 0;
2137 
2138 free:
2139 	kfree(t);
2140 out:
2141 	return -ENOBUFS;
2142 }
2143 
2144 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2145 {
2146 	struct devinet_sysctl_table *t = cnf->sysctl;
2147 
2148 	if (t == NULL)
2149 		return;
2150 
2151 	cnf->sysctl = NULL;
2152 	unregister_net_sysctl_table(t->sysctl_header);
2153 	kfree(t);
2154 }
2155 
2156 static void devinet_sysctl_register(struct in_device *idev)
2157 {
2158 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2159 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2160 					&idev->cnf);
2161 }
2162 
2163 static void devinet_sysctl_unregister(struct in_device *idev)
2164 {
2165 	__devinet_sysctl_unregister(&idev->cnf);
2166 	neigh_sysctl_unregister(idev->arp_parms);
2167 }
2168 
2169 static struct ctl_table ctl_forward_entry[] = {
2170 	{
2171 		.procname	= "ip_forward",
2172 		.data		= &ipv4_devconf.data[
2173 					IPV4_DEVCONF_FORWARDING - 1],
2174 		.maxlen		= sizeof(int),
2175 		.mode		= 0644,
2176 		.proc_handler	= devinet_sysctl_forward,
2177 		.extra1		= &ipv4_devconf,
2178 		.extra2		= &init_net,
2179 	},
2180 	{ },
2181 };
2182 #endif
2183 
2184 static __net_init int devinet_init_net(struct net *net)
2185 {
2186 	int err;
2187 	struct ipv4_devconf *all, *dflt;
2188 #ifdef CONFIG_SYSCTL
2189 	struct ctl_table *tbl = ctl_forward_entry;
2190 	struct ctl_table_header *forw_hdr;
2191 #endif
2192 
2193 	err = -ENOMEM;
2194 	all = &ipv4_devconf;
2195 	dflt = &ipv4_devconf_dflt;
2196 
2197 	if (!net_eq(net, &init_net)) {
2198 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2199 		if (all == NULL)
2200 			goto err_alloc_all;
2201 
2202 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2203 		if (dflt == NULL)
2204 			goto err_alloc_dflt;
2205 
2206 #ifdef CONFIG_SYSCTL
2207 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2208 		if (tbl == NULL)
2209 			goto err_alloc_ctl;
2210 
2211 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2212 		tbl[0].extra1 = all;
2213 		tbl[0].extra2 = net;
2214 #endif
2215 	}
2216 
2217 #ifdef CONFIG_SYSCTL
2218 	err = __devinet_sysctl_register(net, "all", all);
2219 	if (err < 0)
2220 		goto err_reg_all;
2221 
2222 	err = __devinet_sysctl_register(net, "default", dflt);
2223 	if (err < 0)
2224 		goto err_reg_dflt;
2225 
2226 	err = -ENOMEM;
2227 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2228 	if (forw_hdr == NULL)
2229 		goto err_reg_ctl;
2230 	net->ipv4.forw_hdr = forw_hdr;
2231 #endif
2232 
2233 	net->ipv4.devconf_all = all;
2234 	net->ipv4.devconf_dflt = dflt;
2235 	return 0;
2236 
2237 #ifdef CONFIG_SYSCTL
2238 err_reg_ctl:
2239 	__devinet_sysctl_unregister(dflt);
2240 err_reg_dflt:
2241 	__devinet_sysctl_unregister(all);
2242 err_reg_all:
2243 	if (tbl != ctl_forward_entry)
2244 		kfree(tbl);
2245 err_alloc_ctl:
2246 #endif
2247 	if (dflt != &ipv4_devconf_dflt)
2248 		kfree(dflt);
2249 err_alloc_dflt:
2250 	if (all != &ipv4_devconf)
2251 		kfree(all);
2252 err_alloc_all:
2253 	return err;
2254 }
2255 
2256 static __net_exit void devinet_exit_net(struct net *net)
2257 {
2258 #ifdef CONFIG_SYSCTL
2259 	struct ctl_table *tbl;
2260 
2261 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2262 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2263 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2264 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2265 	kfree(tbl);
2266 #endif
2267 	kfree(net->ipv4.devconf_dflt);
2268 	kfree(net->ipv4.devconf_all);
2269 }
2270 
2271 static __net_initdata struct pernet_operations devinet_ops = {
2272 	.init = devinet_init_net,
2273 	.exit = devinet_exit_net,
2274 };
2275 
2276 static struct rtnl_af_ops inet_af_ops = {
2277 	.family		  = AF_INET,
2278 	.fill_link_af	  = inet_fill_link_af,
2279 	.get_link_af_size = inet_get_link_af_size,
2280 	.validate_link_af = inet_validate_link_af,
2281 	.set_link_af	  = inet_set_link_af,
2282 };
2283 
2284 void __init devinet_init(void)
2285 {
2286 	int i;
2287 
2288 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2289 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2290 
2291 	register_pernet_subsys(&devinet_ops);
2292 
2293 	register_gifconf(PF_INET, inet_gifconf);
2294 	register_netdevice_notifier(&ip_netdev_notifier);
2295 
2296 	schedule_delayed_work(&check_lifetime_work, 0);
2297 
2298 	rtnl_af_register(&inet_af_ops);
2299 
2300 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2301 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2302 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2303 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2304 		      inet_netconf_dump_devconf, NULL);
2305 }
2306 
2307