xref: /openbmc/linux/net/ipv4/devinet.c (revision 9d749629)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67 
68 #include "fib_lookup.h"
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 	},
77 };
78 
79 static struct ipv4_devconf ipv4_devconf_dflt = {
80 	.data = {
81 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86 	},
87 };
88 
89 #define IPV4_DEVCONF_DFLT(net, attr) \
90 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
91 
92 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93 	[IFA_LOCAL]     	= { .type = NLA_U32 },
94 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
95 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
96 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
98 };
99 
100 #define IN4_ADDR_HSIZE_SHIFT	8
101 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
102 
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105 
106 static u32 inet_addr_hash(struct net *net, __be32 addr)
107 {
108 	u32 val = (__force u32) addr ^ net_hash_mix(net);
109 
110 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
111 }
112 
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
116 
117 	spin_lock(&inet_addr_hash_lock);
118 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119 	spin_unlock(&inet_addr_hash_lock);
120 }
121 
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124 	spin_lock(&inet_addr_hash_lock);
125 	hlist_del_init_rcu(&ifa->hash);
126 	spin_unlock(&inet_addr_hash_lock);
127 }
128 
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139 	u32 hash = inet_addr_hash(net, addr);
140 	struct net_device *result = NULL;
141 	struct in_ifaddr *ifa;
142 	struct hlist_node *node;
143 
144 	rcu_read_lock();
145 	hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
146 		if (ifa->ifa_local == addr) {
147 			struct net_device *dev = ifa->ifa_dev->dev;
148 
149 			if (!net_eq(dev_net(dev), net))
150 				continue;
151 			result = dev;
152 			break;
153 		}
154 	}
155 	if (!result) {
156 		struct flowi4 fl4 = { .daddr = addr };
157 		struct fib_result res = { 0 };
158 		struct fib_table *local;
159 
160 		/* Fallback to FIB local table so that communication
161 		 * over loopback subnets work.
162 		 */
163 		local = fib_get_table(net, RT_TABLE_LOCAL);
164 		if (local &&
165 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166 		    res.type == RTN_LOCAL)
167 			result = FIB_RES_DEV(res);
168 	}
169 	if (result && devref)
170 		dev_hold(result);
171 	rcu_read_unlock();
172 	return result;
173 }
174 EXPORT_SYMBOL(__ip_dev_find);
175 
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177 
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
180 			 int destroy);
181 #ifdef CONFIG_SYSCTL
182 static void devinet_sysctl_register(struct in_device *idev);
183 static void devinet_sysctl_unregister(struct in_device *idev);
184 #else
185 static void devinet_sysctl_register(struct in_device *idev)
186 {
187 }
188 static void devinet_sysctl_unregister(struct in_device *idev)
189 {
190 }
191 #endif
192 
193 /* Locks all the inet devices. */
194 
195 static struct in_ifaddr *inet_alloc_ifa(void)
196 {
197 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
198 }
199 
200 static void inet_rcu_free_ifa(struct rcu_head *head)
201 {
202 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
203 	if (ifa->ifa_dev)
204 		in_dev_put(ifa->ifa_dev);
205 	kfree(ifa);
206 }
207 
208 static void inet_free_ifa(struct in_ifaddr *ifa)
209 {
210 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
211 }
212 
213 void in_dev_finish_destroy(struct in_device *idev)
214 {
215 	struct net_device *dev = idev->dev;
216 
217 	WARN_ON(idev->ifa_list);
218 	WARN_ON(idev->mc_list);
219 #ifdef NET_REFCNT_DEBUG
220 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
221 #endif
222 	dev_put(dev);
223 	if (!idev->dead)
224 		pr_err("Freeing alive in_device %p\n", idev);
225 	else
226 		kfree(idev);
227 }
228 EXPORT_SYMBOL(in_dev_finish_destroy);
229 
230 static struct in_device *inetdev_init(struct net_device *dev)
231 {
232 	struct in_device *in_dev;
233 
234 	ASSERT_RTNL();
235 
236 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
237 	if (!in_dev)
238 		goto out;
239 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
240 			sizeof(in_dev->cnf));
241 	in_dev->cnf.sysctl = NULL;
242 	in_dev->dev = dev;
243 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
244 	if (!in_dev->arp_parms)
245 		goto out_kfree;
246 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
247 		dev_disable_lro(dev);
248 	/* Reference in_dev->dev */
249 	dev_hold(dev);
250 	/* Account for reference dev->ip_ptr (below) */
251 	in_dev_hold(in_dev);
252 
253 	devinet_sysctl_register(in_dev);
254 	ip_mc_init_dev(in_dev);
255 	if (dev->flags & IFF_UP)
256 		ip_mc_up(in_dev);
257 
258 	/* we can receive as soon as ip_ptr is set -- do this last */
259 	rcu_assign_pointer(dev->ip_ptr, in_dev);
260 out:
261 	return in_dev;
262 out_kfree:
263 	kfree(in_dev);
264 	in_dev = NULL;
265 	goto out;
266 }
267 
268 static void in_dev_rcu_put(struct rcu_head *head)
269 {
270 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
271 	in_dev_put(idev);
272 }
273 
274 static void inetdev_destroy(struct in_device *in_dev)
275 {
276 	struct in_ifaddr *ifa;
277 	struct net_device *dev;
278 
279 	ASSERT_RTNL();
280 
281 	dev = in_dev->dev;
282 
283 	in_dev->dead = 1;
284 
285 	ip_mc_destroy_dev(in_dev);
286 
287 	while ((ifa = in_dev->ifa_list) != NULL) {
288 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
289 		inet_free_ifa(ifa);
290 	}
291 
292 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
293 
294 	devinet_sysctl_unregister(in_dev);
295 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
296 	arp_ifdown(dev);
297 
298 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
299 }
300 
301 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
302 {
303 	rcu_read_lock();
304 	for_primary_ifa(in_dev) {
305 		if (inet_ifa_match(a, ifa)) {
306 			if (!b || inet_ifa_match(b, ifa)) {
307 				rcu_read_unlock();
308 				return 1;
309 			}
310 		}
311 	} endfor_ifa(in_dev);
312 	rcu_read_unlock();
313 	return 0;
314 }
315 
316 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
317 			 int destroy, struct nlmsghdr *nlh, u32 portid)
318 {
319 	struct in_ifaddr *promote = NULL;
320 	struct in_ifaddr *ifa, *ifa1 = *ifap;
321 	struct in_ifaddr *last_prim = in_dev->ifa_list;
322 	struct in_ifaddr *prev_prom = NULL;
323 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
324 
325 	ASSERT_RTNL();
326 
327 	/* 1. Deleting primary ifaddr forces deletion all secondaries
328 	 * unless alias promotion is set
329 	 **/
330 
331 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
332 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
333 
334 		while ((ifa = *ifap1) != NULL) {
335 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
336 			    ifa1->ifa_scope <= ifa->ifa_scope)
337 				last_prim = ifa;
338 
339 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
340 			    ifa1->ifa_mask != ifa->ifa_mask ||
341 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
342 				ifap1 = &ifa->ifa_next;
343 				prev_prom = ifa;
344 				continue;
345 			}
346 
347 			if (!do_promote) {
348 				inet_hash_remove(ifa);
349 				*ifap1 = ifa->ifa_next;
350 
351 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
352 				blocking_notifier_call_chain(&inetaddr_chain,
353 						NETDEV_DOWN, ifa);
354 				inet_free_ifa(ifa);
355 			} else {
356 				promote = ifa;
357 				break;
358 			}
359 		}
360 	}
361 
362 	/* On promotion all secondaries from subnet are changing
363 	 * the primary IP, we must remove all their routes silently
364 	 * and later to add them back with new prefsrc. Do this
365 	 * while all addresses are on the device list.
366 	 */
367 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
368 		if (ifa1->ifa_mask == ifa->ifa_mask &&
369 		    inet_ifa_match(ifa1->ifa_address, ifa))
370 			fib_del_ifaddr(ifa, ifa1);
371 	}
372 
373 	/* 2. Unlink it */
374 
375 	*ifap = ifa1->ifa_next;
376 	inet_hash_remove(ifa1);
377 
378 	/* 3. Announce address deletion */
379 
380 	/* Send message first, then call notifier.
381 	   At first sight, FIB update triggered by notifier
382 	   will refer to already deleted ifaddr, that could confuse
383 	   netlink listeners. It is not true: look, gated sees
384 	   that route deleted and if it still thinks that ifaddr
385 	   is valid, it will try to restore deleted routes... Grr.
386 	   So that, this order is correct.
387 	 */
388 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
389 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
390 
391 	if (promote) {
392 		struct in_ifaddr *next_sec = promote->ifa_next;
393 
394 		if (prev_prom) {
395 			prev_prom->ifa_next = promote->ifa_next;
396 			promote->ifa_next = last_prim->ifa_next;
397 			last_prim->ifa_next = promote;
398 		}
399 
400 		promote->ifa_flags &= ~IFA_F_SECONDARY;
401 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
402 		blocking_notifier_call_chain(&inetaddr_chain,
403 				NETDEV_UP, promote);
404 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
405 			if (ifa1->ifa_mask != ifa->ifa_mask ||
406 			    !inet_ifa_match(ifa1->ifa_address, ifa))
407 					continue;
408 			fib_add_ifaddr(ifa);
409 		}
410 
411 	}
412 	if (destroy)
413 		inet_free_ifa(ifa1);
414 }
415 
416 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
417 			 int destroy)
418 {
419 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
420 }
421 
422 static void check_lifetime(struct work_struct *work);
423 
424 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
425 
426 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
427 			     u32 portid)
428 {
429 	struct in_device *in_dev = ifa->ifa_dev;
430 	struct in_ifaddr *ifa1, **ifap, **last_primary;
431 
432 	ASSERT_RTNL();
433 
434 	if (!ifa->ifa_local) {
435 		inet_free_ifa(ifa);
436 		return 0;
437 	}
438 
439 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
440 	last_primary = &in_dev->ifa_list;
441 
442 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
443 	     ifap = &ifa1->ifa_next) {
444 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
445 		    ifa->ifa_scope <= ifa1->ifa_scope)
446 			last_primary = &ifa1->ifa_next;
447 		if (ifa1->ifa_mask == ifa->ifa_mask &&
448 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
449 			if (ifa1->ifa_local == ifa->ifa_local) {
450 				inet_free_ifa(ifa);
451 				return -EEXIST;
452 			}
453 			if (ifa1->ifa_scope != ifa->ifa_scope) {
454 				inet_free_ifa(ifa);
455 				return -EINVAL;
456 			}
457 			ifa->ifa_flags |= IFA_F_SECONDARY;
458 		}
459 	}
460 
461 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
462 		net_srandom(ifa->ifa_local);
463 		ifap = last_primary;
464 	}
465 
466 	ifa->ifa_next = *ifap;
467 	*ifap = ifa;
468 
469 	inet_hash_insert(dev_net(in_dev->dev), ifa);
470 
471 	cancel_delayed_work(&check_lifetime_work);
472 	schedule_delayed_work(&check_lifetime_work, 0);
473 
474 	/* Send message first, then call notifier.
475 	   Notifier will trigger FIB update, so that
476 	   listeners of netlink will know about new ifaddr */
477 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
478 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
479 
480 	return 0;
481 }
482 
483 static int inet_insert_ifa(struct in_ifaddr *ifa)
484 {
485 	return __inet_insert_ifa(ifa, NULL, 0);
486 }
487 
488 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
489 {
490 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
491 
492 	ASSERT_RTNL();
493 
494 	if (!in_dev) {
495 		inet_free_ifa(ifa);
496 		return -ENOBUFS;
497 	}
498 	ipv4_devconf_setall(in_dev);
499 	if (ifa->ifa_dev != in_dev) {
500 		WARN_ON(ifa->ifa_dev);
501 		in_dev_hold(in_dev);
502 		ifa->ifa_dev = in_dev;
503 	}
504 	if (ipv4_is_loopback(ifa->ifa_local))
505 		ifa->ifa_scope = RT_SCOPE_HOST;
506 	return inet_insert_ifa(ifa);
507 }
508 
509 /* Caller must hold RCU or RTNL :
510  * We dont take a reference on found in_device
511  */
512 struct in_device *inetdev_by_index(struct net *net, int ifindex)
513 {
514 	struct net_device *dev;
515 	struct in_device *in_dev = NULL;
516 
517 	rcu_read_lock();
518 	dev = dev_get_by_index_rcu(net, ifindex);
519 	if (dev)
520 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
521 	rcu_read_unlock();
522 	return in_dev;
523 }
524 EXPORT_SYMBOL(inetdev_by_index);
525 
526 /* Called only from RTNL semaphored context. No locks. */
527 
528 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
529 				    __be32 mask)
530 {
531 	ASSERT_RTNL();
532 
533 	for_primary_ifa(in_dev) {
534 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
535 			return ifa;
536 	} endfor_ifa(in_dev);
537 	return NULL;
538 }
539 
540 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
541 {
542 	struct net *net = sock_net(skb->sk);
543 	struct nlattr *tb[IFA_MAX+1];
544 	struct in_device *in_dev;
545 	struct ifaddrmsg *ifm;
546 	struct in_ifaddr *ifa, **ifap;
547 	int err = -EINVAL;
548 
549 	ASSERT_RTNL();
550 
551 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
552 	if (err < 0)
553 		goto errout;
554 
555 	ifm = nlmsg_data(nlh);
556 	in_dev = inetdev_by_index(net, ifm->ifa_index);
557 	if (in_dev == NULL) {
558 		err = -ENODEV;
559 		goto errout;
560 	}
561 
562 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
563 	     ifap = &ifa->ifa_next) {
564 		if (tb[IFA_LOCAL] &&
565 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
566 			continue;
567 
568 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
569 			continue;
570 
571 		if (tb[IFA_ADDRESS] &&
572 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
573 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
574 			continue;
575 
576 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
577 		return 0;
578 	}
579 
580 	err = -EADDRNOTAVAIL;
581 errout:
582 	return err;
583 }
584 
585 #define INFINITY_LIFE_TIME	0xFFFFFFFF
586 
587 static void check_lifetime(struct work_struct *work)
588 {
589 	unsigned long now, next, next_sec, next_sched;
590 	struct in_ifaddr *ifa;
591 	struct hlist_node *node;
592 	int i;
593 
594 	now = jiffies;
595 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
596 
597 	rcu_read_lock();
598 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
599 		hlist_for_each_entry_rcu(ifa, node,
600 					 &inet_addr_lst[i], hash) {
601 			unsigned long age;
602 
603 			if (ifa->ifa_flags & IFA_F_PERMANENT)
604 				continue;
605 
606 			/* We try to batch several events at once. */
607 			age = (now - ifa->ifa_tstamp +
608 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
609 
610 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
611 			    age >= ifa->ifa_valid_lft) {
612 				struct in_ifaddr **ifap ;
613 
614 				rtnl_lock();
615 				for (ifap = &ifa->ifa_dev->ifa_list;
616 				     *ifap != NULL; ifap = &ifa->ifa_next) {
617 					if (*ifap == ifa)
618 						inet_del_ifa(ifa->ifa_dev,
619 							     ifap, 1);
620 				}
621 				rtnl_unlock();
622 			} else if (ifa->ifa_preferred_lft ==
623 				   INFINITY_LIFE_TIME) {
624 				continue;
625 			} else if (age >= ifa->ifa_preferred_lft) {
626 				if (time_before(ifa->ifa_tstamp +
627 						ifa->ifa_valid_lft * HZ, next))
628 					next = ifa->ifa_tstamp +
629 					       ifa->ifa_valid_lft * HZ;
630 
631 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) {
632 					ifa->ifa_flags |= IFA_F_DEPRECATED;
633 					rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
634 				}
635 			} else if (time_before(ifa->ifa_tstamp +
636 					       ifa->ifa_preferred_lft * HZ,
637 					       next)) {
638 				next = ifa->ifa_tstamp +
639 				       ifa->ifa_preferred_lft * HZ;
640 			}
641 		}
642 	}
643 	rcu_read_unlock();
644 
645 	next_sec = round_jiffies_up(next);
646 	next_sched = next;
647 
648 	/* If rounded timeout is accurate enough, accept it. */
649 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
650 		next_sched = next_sec;
651 
652 	now = jiffies;
653 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
654 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
655 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
656 
657 	schedule_delayed_work(&check_lifetime_work, next_sched - now);
658 }
659 
660 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
661 			     __u32 prefered_lft)
662 {
663 	unsigned long timeout;
664 
665 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
666 
667 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
668 	if (addrconf_finite_timeout(timeout))
669 		ifa->ifa_valid_lft = timeout;
670 	else
671 		ifa->ifa_flags |= IFA_F_PERMANENT;
672 
673 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
674 	if (addrconf_finite_timeout(timeout)) {
675 		if (timeout == 0)
676 			ifa->ifa_flags |= IFA_F_DEPRECATED;
677 		ifa->ifa_preferred_lft = timeout;
678 	}
679 	ifa->ifa_tstamp = jiffies;
680 	if (!ifa->ifa_cstamp)
681 		ifa->ifa_cstamp = ifa->ifa_tstamp;
682 }
683 
684 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
685 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
686 {
687 	struct nlattr *tb[IFA_MAX+1];
688 	struct in_ifaddr *ifa;
689 	struct ifaddrmsg *ifm;
690 	struct net_device *dev;
691 	struct in_device *in_dev;
692 	int err;
693 
694 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
695 	if (err < 0)
696 		goto errout;
697 
698 	ifm = nlmsg_data(nlh);
699 	err = -EINVAL;
700 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
701 		goto errout;
702 
703 	dev = __dev_get_by_index(net, ifm->ifa_index);
704 	err = -ENODEV;
705 	if (dev == NULL)
706 		goto errout;
707 
708 	in_dev = __in_dev_get_rtnl(dev);
709 	err = -ENOBUFS;
710 	if (in_dev == NULL)
711 		goto errout;
712 
713 	ifa = inet_alloc_ifa();
714 	if (ifa == NULL)
715 		/*
716 		 * A potential indev allocation can be left alive, it stays
717 		 * assigned to its device and is destroy with it.
718 		 */
719 		goto errout;
720 
721 	ipv4_devconf_setall(in_dev);
722 	in_dev_hold(in_dev);
723 
724 	if (tb[IFA_ADDRESS] == NULL)
725 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
726 
727 	INIT_HLIST_NODE(&ifa->hash);
728 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
729 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
730 	ifa->ifa_flags = ifm->ifa_flags;
731 	ifa->ifa_scope = ifm->ifa_scope;
732 	ifa->ifa_dev = in_dev;
733 
734 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
735 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
736 
737 	if (tb[IFA_BROADCAST])
738 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
739 
740 	if (tb[IFA_LABEL])
741 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
742 	else
743 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
744 
745 	if (tb[IFA_CACHEINFO]) {
746 		struct ifa_cacheinfo *ci;
747 
748 		ci = nla_data(tb[IFA_CACHEINFO]);
749 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
750 			err = -EINVAL;
751 			goto errout;
752 		}
753 		*pvalid_lft = ci->ifa_valid;
754 		*pprefered_lft = ci->ifa_prefered;
755 	}
756 
757 	return ifa;
758 
759 errout:
760 	return ERR_PTR(err);
761 }
762 
763 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
764 {
765 	struct in_device *in_dev = ifa->ifa_dev;
766 	struct in_ifaddr *ifa1, **ifap;
767 
768 	if (!ifa->ifa_local)
769 		return NULL;
770 
771 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
772 	     ifap = &ifa1->ifa_next) {
773 		if (ifa1->ifa_mask == ifa->ifa_mask &&
774 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
775 		    ifa1->ifa_local == ifa->ifa_local)
776 			return ifa1;
777 	}
778 	return NULL;
779 }
780 
781 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
782 {
783 	struct net *net = sock_net(skb->sk);
784 	struct in_ifaddr *ifa;
785 	struct in_ifaddr *ifa_existing;
786 	__u32 valid_lft = INFINITY_LIFE_TIME;
787 	__u32 prefered_lft = INFINITY_LIFE_TIME;
788 
789 	ASSERT_RTNL();
790 
791 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
792 	if (IS_ERR(ifa))
793 		return PTR_ERR(ifa);
794 
795 	ifa_existing = find_matching_ifa(ifa);
796 	if (!ifa_existing) {
797 		/* It would be best to check for !NLM_F_CREATE here but
798 		 * userspace alreay relies on not having to provide this.
799 		 */
800 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
801 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
802 	} else {
803 		inet_free_ifa(ifa);
804 
805 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
806 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
807 			return -EEXIST;
808 
809 		set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft);
810 	}
811 	return 0;
812 }
813 
814 /*
815  *	Determine a default network mask, based on the IP address.
816  */
817 
818 static int inet_abc_len(__be32 addr)
819 {
820 	int rc = -1;	/* Something else, probably a multicast. */
821 
822 	if (ipv4_is_zeronet(addr))
823 		rc = 0;
824 	else {
825 		__u32 haddr = ntohl(addr);
826 
827 		if (IN_CLASSA(haddr))
828 			rc = 8;
829 		else if (IN_CLASSB(haddr))
830 			rc = 16;
831 		else if (IN_CLASSC(haddr))
832 			rc = 24;
833 	}
834 
835 	return rc;
836 }
837 
838 
839 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
840 {
841 	struct ifreq ifr;
842 	struct sockaddr_in sin_orig;
843 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
844 	struct in_device *in_dev;
845 	struct in_ifaddr **ifap = NULL;
846 	struct in_ifaddr *ifa = NULL;
847 	struct net_device *dev;
848 	char *colon;
849 	int ret = -EFAULT;
850 	int tryaddrmatch = 0;
851 
852 	/*
853 	 *	Fetch the caller's info block into kernel space
854 	 */
855 
856 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
857 		goto out;
858 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
859 
860 	/* save original address for comparison */
861 	memcpy(&sin_orig, sin, sizeof(*sin));
862 
863 	colon = strchr(ifr.ifr_name, ':');
864 	if (colon)
865 		*colon = 0;
866 
867 	dev_load(net, ifr.ifr_name);
868 
869 	switch (cmd) {
870 	case SIOCGIFADDR:	/* Get interface address */
871 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
872 	case SIOCGIFDSTADDR:	/* Get the destination address */
873 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
874 		/* Note that these ioctls will not sleep,
875 		   so that we do not impose a lock.
876 		   One day we will be forced to put shlock here (I mean SMP)
877 		 */
878 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
879 		memset(sin, 0, sizeof(*sin));
880 		sin->sin_family = AF_INET;
881 		break;
882 
883 	case SIOCSIFFLAGS:
884 		ret = -EPERM;
885 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
886 			goto out;
887 		break;
888 	case SIOCSIFADDR:	/* Set interface address (and family) */
889 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
890 	case SIOCSIFDSTADDR:	/* Set the destination address */
891 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
892 		ret = -EPERM;
893 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
894 			goto out;
895 		ret = -EINVAL;
896 		if (sin->sin_family != AF_INET)
897 			goto out;
898 		break;
899 	default:
900 		ret = -EINVAL;
901 		goto out;
902 	}
903 
904 	rtnl_lock();
905 
906 	ret = -ENODEV;
907 	dev = __dev_get_by_name(net, ifr.ifr_name);
908 	if (!dev)
909 		goto done;
910 
911 	if (colon)
912 		*colon = ':';
913 
914 	in_dev = __in_dev_get_rtnl(dev);
915 	if (in_dev) {
916 		if (tryaddrmatch) {
917 			/* Matthias Andree */
918 			/* compare label and address (4.4BSD style) */
919 			/* note: we only do this for a limited set of ioctls
920 			   and only if the original address family was AF_INET.
921 			   This is checked above. */
922 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
923 			     ifap = &ifa->ifa_next) {
924 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
925 				    sin_orig.sin_addr.s_addr ==
926 							ifa->ifa_local) {
927 					break; /* found */
928 				}
929 			}
930 		}
931 		/* we didn't get a match, maybe the application is
932 		   4.3BSD-style and passed in junk so we fall back to
933 		   comparing just the label */
934 		if (!ifa) {
935 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
936 			     ifap = &ifa->ifa_next)
937 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
938 					break;
939 		}
940 	}
941 
942 	ret = -EADDRNOTAVAIL;
943 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
944 		goto done;
945 
946 	switch (cmd) {
947 	case SIOCGIFADDR:	/* Get interface address */
948 		sin->sin_addr.s_addr = ifa->ifa_local;
949 		goto rarok;
950 
951 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
952 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
953 		goto rarok;
954 
955 	case SIOCGIFDSTADDR:	/* Get the destination address */
956 		sin->sin_addr.s_addr = ifa->ifa_address;
957 		goto rarok;
958 
959 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
960 		sin->sin_addr.s_addr = ifa->ifa_mask;
961 		goto rarok;
962 
963 	case SIOCSIFFLAGS:
964 		if (colon) {
965 			ret = -EADDRNOTAVAIL;
966 			if (!ifa)
967 				break;
968 			ret = 0;
969 			if (!(ifr.ifr_flags & IFF_UP))
970 				inet_del_ifa(in_dev, ifap, 1);
971 			break;
972 		}
973 		ret = dev_change_flags(dev, ifr.ifr_flags);
974 		break;
975 
976 	case SIOCSIFADDR:	/* Set interface address (and family) */
977 		ret = -EINVAL;
978 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
979 			break;
980 
981 		if (!ifa) {
982 			ret = -ENOBUFS;
983 			ifa = inet_alloc_ifa();
984 			if (!ifa)
985 				break;
986 			INIT_HLIST_NODE(&ifa->hash);
987 			if (colon)
988 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
989 			else
990 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
991 		} else {
992 			ret = 0;
993 			if (ifa->ifa_local == sin->sin_addr.s_addr)
994 				break;
995 			inet_del_ifa(in_dev, ifap, 0);
996 			ifa->ifa_broadcast = 0;
997 			ifa->ifa_scope = 0;
998 		}
999 
1000 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1001 
1002 		if (!(dev->flags & IFF_POINTOPOINT)) {
1003 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1004 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1005 			if ((dev->flags & IFF_BROADCAST) &&
1006 			    ifa->ifa_prefixlen < 31)
1007 				ifa->ifa_broadcast = ifa->ifa_address |
1008 						     ~ifa->ifa_mask;
1009 		} else {
1010 			ifa->ifa_prefixlen = 32;
1011 			ifa->ifa_mask = inet_make_mask(32);
1012 		}
1013 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1014 		ret = inet_set_ifa(dev, ifa);
1015 		break;
1016 
1017 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1018 		ret = 0;
1019 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1020 			inet_del_ifa(in_dev, ifap, 0);
1021 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1022 			inet_insert_ifa(ifa);
1023 		}
1024 		break;
1025 
1026 	case SIOCSIFDSTADDR:	/* Set the destination address */
1027 		ret = 0;
1028 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1029 			break;
1030 		ret = -EINVAL;
1031 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1032 			break;
1033 		ret = 0;
1034 		inet_del_ifa(in_dev, ifap, 0);
1035 		ifa->ifa_address = sin->sin_addr.s_addr;
1036 		inet_insert_ifa(ifa);
1037 		break;
1038 
1039 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1040 
1041 		/*
1042 		 *	The mask we set must be legal.
1043 		 */
1044 		ret = -EINVAL;
1045 		if (bad_mask(sin->sin_addr.s_addr, 0))
1046 			break;
1047 		ret = 0;
1048 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1049 			__be32 old_mask = ifa->ifa_mask;
1050 			inet_del_ifa(in_dev, ifap, 0);
1051 			ifa->ifa_mask = sin->sin_addr.s_addr;
1052 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1053 
1054 			/* See if current broadcast address matches
1055 			 * with current netmask, then recalculate
1056 			 * the broadcast address. Otherwise it's a
1057 			 * funny address, so don't touch it since
1058 			 * the user seems to know what (s)he's doing...
1059 			 */
1060 			if ((dev->flags & IFF_BROADCAST) &&
1061 			    (ifa->ifa_prefixlen < 31) &&
1062 			    (ifa->ifa_broadcast ==
1063 			     (ifa->ifa_local|~old_mask))) {
1064 				ifa->ifa_broadcast = (ifa->ifa_local |
1065 						      ~sin->sin_addr.s_addr);
1066 			}
1067 			inet_insert_ifa(ifa);
1068 		}
1069 		break;
1070 	}
1071 done:
1072 	rtnl_unlock();
1073 out:
1074 	return ret;
1075 rarok:
1076 	rtnl_unlock();
1077 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1078 	goto out;
1079 }
1080 
1081 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1082 {
1083 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1084 	struct in_ifaddr *ifa;
1085 	struct ifreq ifr;
1086 	int done = 0;
1087 
1088 	if (!in_dev)
1089 		goto out;
1090 
1091 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1092 		if (!buf) {
1093 			done += sizeof(ifr);
1094 			continue;
1095 		}
1096 		if (len < (int) sizeof(ifr))
1097 			break;
1098 		memset(&ifr, 0, sizeof(struct ifreq));
1099 		if (ifa->ifa_label)
1100 			strcpy(ifr.ifr_name, ifa->ifa_label);
1101 		else
1102 			strcpy(ifr.ifr_name, dev->name);
1103 
1104 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1105 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1106 								ifa->ifa_local;
1107 
1108 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1109 			done = -EFAULT;
1110 			break;
1111 		}
1112 		buf  += sizeof(struct ifreq);
1113 		len  -= sizeof(struct ifreq);
1114 		done += sizeof(struct ifreq);
1115 	}
1116 out:
1117 	return done;
1118 }
1119 
1120 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1121 {
1122 	__be32 addr = 0;
1123 	struct in_device *in_dev;
1124 	struct net *net = dev_net(dev);
1125 
1126 	rcu_read_lock();
1127 	in_dev = __in_dev_get_rcu(dev);
1128 	if (!in_dev)
1129 		goto no_in_dev;
1130 
1131 	for_primary_ifa(in_dev) {
1132 		if (ifa->ifa_scope > scope)
1133 			continue;
1134 		if (!dst || inet_ifa_match(dst, ifa)) {
1135 			addr = ifa->ifa_local;
1136 			break;
1137 		}
1138 		if (!addr)
1139 			addr = ifa->ifa_local;
1140 	} endfor_ifa(in_dev);
1141 
1142 	if (addr)
1143 		goto out_unlock;
1144 no_in_dev:
1145 
1146 	/* Not loopback addresses on loopback should be preferred
1147 	   in this case. It is importnat that lo is the first interface
1148 	   in dev_base list.
1149 	 */
1150 	for_each_netdev_rcu(net, dev) {
1151 		in_dev = __in_dev_get_rcu(dev);
1152 		if (!in_dev)
1153 			continue;
1154 
1155 		for_primary_ifa(in_dev) {
1156 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1157 			    ifa->ifa_scope <= scope) {
1158 				addr = ifa->ifa_local;
1159 				goto out_unlock;
1160 			}
1161 		} endfor_ifa(in_dev);
1162 	}
1163 out_unlock:
1164 	rcu_read_unlock();
1165 	return addr;
1166 }
1167 EXPORT_SYMBOL(inet_select_addr);
1168 
1169 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1170 			      __be32 local, int scope)
1171 {
1172 	int same = 0;
1173 	__be32 addr = 0;
1174 
1175 	for_ifa(in_dev) {
1176 		if (!addr &&
1177 		    (local == ifa->ifa_local || !local) &&
1178 		    ifa->ifa_scope <= scope) {
1179 			addr = ifa->ifa_local;
1180 			if (same)
1181 				break;
1182 		}
1183 		if (!same) {
1184 			same = (!local || inet_ifa_match(local, ifa)) &&
1185 				(!dst || inet_ifa_match(dst, ifa));
1186 			if (same && addr) {
1187 				if (local || !dst)
1188 					break;
1189 				/* Is the selected addr into dst subnet? */
1190 				if (inet_ifa_match(addr, ifa))
1191 					break;
1192 				/* No, then can we use new local src? */
1193 				if (ifa->ifa_scope <= scope) {
1194 					addr = ifa->ifa_local;
1195 					break;
1196 				}
1197 				/* search for large dst subnet for addr */
1198 				same = 0;
1199 			}
1200 		}
1201 	} endfor_ifa(in_dev);
1202 
1203 	return same ? addr : 0;
1204 }
1205 
1206 /*
1207  * Confirm that local IP address exists using wildcards:
1208  * - in_dev: only on this interface, 0=any interface
1209  * - dst: only in the same subnet as dst, 0=any dst
1210  * - local: address, 0=autoselect the local address
1211  * - scope: maximum allowed scope value for the local address
1212  */
1213 __be32 inet_confirm_addr(struct in_device *in_dev,
1214 			 __be32 dst, __be32 local, int scope)
1215 {
1216 	__be32 addr = 0;
1217 	struct net_device *dev;
1218 	struct net *net;
1219 
1220 	if (scope != RT_SCOPE_LINK)
1221 		return confirm_addr_indev(in_dev, dst, local, scope);
1222 
1223 	net = dev_net(in_dev->dev);
1224 	rcu_read_lock();
1225 	for_each_netdev_rcu(net, dev) {
1226 		in_dev = __in_dev_get_rcu(dev);
1227 		if (in_dev) {
1228 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1229 			if (addr)
1230 				break;
1231 		}
1232 	}
1233 	rcu_read_unlock();
1234 
1235 	return addr;
1236 }
1237 EXPORT_SYMBOL(inet_confirm_addr);
1238 
1239 /*
1240  *	Device notifier
1241  */
1242 
1243 int register_inetaddr_notifier(struct notifier_block *nb)
1244 {
1245 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1246 }
1247 EXPORT_SYMBOL(register_inetaddr_notifier);
1248 
1249 int unregister_inetaddr_notifier(struct notifier_block *nb)
1250 {
1251 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1252 }
1253 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1254 
1255 /* Rename ifa_labels for a device name change. Make some effort to preserve
1256  * existing alias numbering and to create unique labels if possible.
1257 */
1258 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1259 {
1260 	struct in_ifaddr *ifa;
1261 	int named = 0;
1262 
1263 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1264 		char old[IFNAMSIZ], *dot;
1265 
1266 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1267 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1268 		if (named++ == 0)
1269 			goto skip;
1270 		dot = strchr(old, ':');
1271 		if (dot == NULL) {
1272 			sprintf(old, ":%d", named);
1273 			dot = old;
1274 		}
1275 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1276 			strcat(ifa->ifa_label, dot);
1277 		else
1278 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1279 skip:
1280 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1281 	}
1282 }
1283 
1284 static bool inetdev_valid_mtu(unsigned int mtu)
1285 {
1286 	return mtu >= 68;
1287 }
1288 
1289 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1290 					struct in_device *in_dev)
1291 
1292 {
1293 	struct in_ifaddr *ifa;
1294 
1295 	for (ifa = in_dev->ifa_list; ifa;
1296 	     ifa = ifa->ifa_next) {
1297 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1298 			 ifa->ifa_local, dev,
1299 			 ifa->ifa_local, NULL,
1300 			 dev->dev_addr, NULL);
1301 	}
1302 }
1303 
1304 /* Called only under RTNL semaphore */
1305 
1306 static int inetdev_event(struct notifier_block *this, unsigned long event,
1307 			 void *ptr)
1308 {
1309 	struct net_device *dev = ptr;
1310 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1311 
1312 	ASSERT_RTNL();
1313 
1314 	if (!in_dev) {
1315 		if (event == NETDEV_REGISTER) {
1316 			in_dev = inetdev_init(dev);
1317 			if (!in_dev)
1318 				return notifier_from_errno(-ENOMEM);
1319 			if (dev->flags & IFF_LOOPBACK) {
1320 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1321 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1322 			}
1323 		} else if (event == NETDEV_CHANGEMTU) {
1324 			/* Re-enabling IP */
1325 			if (inetdev_valid_mtu(dev->mtu))
1326 				in_dev = inetdev_init(dev);
1327 		}
1328 		goto out;
1329 	}
1330 
1331 	switch (event) {
1332 	case NETDEV_REGISTER:
1333 		pr_debug("%s: bug\n", __func__);
1334 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1335 		break;
1336 	case NETDEV_UP:
1337 		if (!inetdev_valid_mtu(dev->mtu))
1338 			break;
1339 		if (dev->flags & IFF_LOOPBACK) {
1340 			struct in_ifaddr *ifa = inet_alloc_ifa();
1341 
1342 			if (ifa) {
1343 				INIT_HLIST_NODE(&ifa->hash);
1344 				ifa->ifa_local =
1345 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1346 				ifa->ifa_prefixlen = 8;
1347 				ifa->ifa_mask = inet_make_mask(8);
1348 				in_dev_hold(in_dev);
1349 				ifa->ifa_dev = in_dev;
1350 				ifa->ifa_scope = RT_SCOPE_HOST;
1351 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1352 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1353 						 INFINITY_LIFE_TIME);
1354 				inet_insert_ifa(ifa);
1355 			}
1356 		}
1357 		ip_mc_up(in_dev);
1358 		/* fall through */
1359 	case NETDEV_CHANGEADDR:
1360 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1361 			break;
1362 		/* fall through */
1363 	case NETDEV_NOTIFY_PEERS:
1364 		/* Send gratuitous ARP to notify of link change */
1365 		inetdev_send_gratuitous_arp(dev, in_dev);
1366 		break;
1367 	case NETDEV_DOWN:
1368 		ip_mc_down(in_dev);
1369 		break;
1370 	case NETDEV_PRE_TYPE_CHANGE:
1371 		ip_mc_unmap(in_dev);
1372 		break;
1373 	case NETDEV_POST_TYPE_CHANGE:
1374 		ip_mc_remap(in_dev);
1375 		break;
1376 	case NETDEV_CHANGEMTU:
1377 		if (inetdev_valid_mtu(dev->mtu))
1378 			break;
1379 		/* disable IP when MTU is not enough */
1380 	case NETDEV_UNREGISTER:
1381 		inetdev_destroy(in_dev);
1382 		break;
1383 	case NETDEV_CHANGENAME:
1384 		/* Do not notify about label change, this event is
1385 		 * not interesting to applications using netlink.
1386 		 */
1387 		inetdev_changename(dev, in_dev);
1388 
1389 		devinet_sysctl_unregister(in_dev);
1390 		devinet_sysctl_register(in_dev);
1391 		break;
1392 	}
1393 out:
1394 	return NOTIFY_DONE;
1395 }
1396 
1397 static struct notifier_block ip_netdev_notifier = {
1398 	.notifier_call = inetdev_event,
1399 };
1400 
1401 static size_t inet_nlmsg_size(void)
1402 {
1403 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1404 	       + nla_total_size(4) /* IFA_ADDRESS */
1405 	       + nla_total_size(4) /* IFA_LOCAL */
1406 	       + nla_total_size(4) /* IFA_BROADCAST */
1407 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1408 }
1409 
1410 static inline u32 cstamp_delta(unsigned long cstamp)
1411 {
1412 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1413 }
1414 
1415 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1416 			 unsigned long tstamp, u32 preferred, u32 valid)
1417 {
1418 	struct ifa_cacheinfo ci;
1419 
1420 	ci.cstamp = cstamp_delta(cstamp);
1421 	ci.tstamp = cstamp_delta(tstamp);
1422 	ci.ifa_prefered = preferred;
1423 	ci.ifa_valid = valid;
1424 
1425 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1426 }
1427 
1428 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1429 			    u32 portid, u32 seq, int event, unsigned int flags)
1430 {
1431 	struct ifaddrmsg *ifm;
1432 	struct nlmsghdr  *nlh;
1433 	u32 preferred, valid;
1434 
1435 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1436 	if (nlh == NULL)
1437 		return -EMSGSIZE;
1438 
1439 	ifm = nlmsg_data(nlh);
1440 	ifm->ifa_family = AF_INET;
1441 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1442 	ifm->ifa_flags = ifa->ifa_flags;
1443 	ifm->ifa_scope = ifa->ifa_scope;
1444 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1445 
1446 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1447 		preferred = ifa->ifa_preferred_lft;
1448 		valid = ifa->ifa_valid_lft;
1449 		if (preferred != INFINITY_LIFE_TIME) {
1450 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1451 
1452 			if (preferred > tval)
1453 				preferred -= tval;
1454 			else
1455 				preferred = 0;
1456 			if (valid != INFINITY_LIFE_TIME) {
1457 				if (valid > tval)
1458 					valid -= tval;
1459 				else
1460 					valid = 0;
1461 			}
1462 		}
1463 	} else {
1464 		preferred = INFINITY_LIFE_TIME;
1465 		valid = INFINITY_LIFE_TIME;
1466 	}
1467 	if ((ifa->ifa_address &&
1468 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1469 	    (ifa->ifa_local &&
1470 	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1471 	    (ifa->ifa_broadcast &&
1472 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1473 	    (ifa->ifa_label[0] &&
1474 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1475 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1476 			  preferred, valid))
1477 		goto nla_put_failure;
1478 
1479 	return nlmsg_end(skb, nlh);
1480 
1481 nla_put_failure:
1482 	nlmsg_cancel(skb, nlh);
1483 	return -EMSGSIZE;
1484 }
1485 
1486 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1487 {
1488 	struct net *net = sock_net(skb->sk);
1489 	int h, s_h;
1490 	int idx, s_idx;
1491 	int ip_idx, s_ip_idx;
1492 	struct net_device *dev;
1493 	struct in_device *in_dev;
1494 	struct in_ifaddr *ifa;
1495 	struct hlist_head *head;
1496 	struct hlist_node *node;
1497 
1498 	s_h = cb->args[0];
1499 	s_idx = idx = cb->args[1];
1500 	s_ip_idx = ip_idx = cb->args[2];
1501 
1502 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1503 		idx = 0;
1504 		head = &net->dev_index_head[h];
1505 		rcu_read_lock();
1506 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1507 			if (idx < s_idx)
1508 				goto cont;
1509 			if (h > s_h || idx > s_idx)
1510 				s_ip_idx = 0;
1511 			in_dev = __in_dev_get_rcu(dev);
1512 			if (!in_dev)
1513 				goto cont;
1514 
1515 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1516 			     ifa = ifa->ifa_next, ip_idx++) {
1517 				if (ip_idx < s_ip_idx)
1518 					continue;
1519 				if (inet_fill_ifaddr(skb, ifa,
1520 					     NETLINK_CB(cb->skb).portid,
1521 					     cb->nlh->nlmsg_seq,
1522 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1523 					rcu_read_unlock();
1524 					goto done;
1525 				}
1526 			}
1527 cont:
1528 			idx++;
1529 		}
1530 		rcu_read_unlock();
1531 	}
1532 
1533 done:
1534 	cb->args[0] = h;
1535 	cb->args[1] = idx;
1536 	cb->args[2] = ip_idx;
1537 
1538 	return skb->len;
1539 }
1540 
1541 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1542 		      u32 portid)
1543 {
1544 	struct sk_buff *skb;
1545 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1546 	int err = -ENOBUFS;
1547 	struct net *net;
1548 
1549 	net = dev_net(ifa->ifa_dev->dev);
1550 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1551 	if (skb == NULL)
1552 		goto errout;
1553 
1554 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1555 	if (err < 0) {
1556 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1557 		WARN_ON(err == -EMSGSIZE);
1558 		kfree_skb(skb);
1559 		goto errout;
1560 	}
1561 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1562 	return;
1563 errout:
1564 	if (err < 0)
1565 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1566 }
1567 
1568 static size_t inet_get_link_af_size(const struct net_device *dev)
1569 {
1570 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1571 
1572 	if (!in_dev)
1573 		return 0;
1574 
1575 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1576 }
1577 
1578 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1579 {
1580 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1581 	struct nlattr *nla;
1582 	int i;
1583 
1584 	if (!in_dev)
1585 		return -ENODATA;
1586 
1587 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1588 	if (nla == NULL)
1589 		return -EMSGSIZE;
1590 
1591 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1592 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1593 
1594 	return 0;
1595 }
1596 
1597 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1598 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1599 };
1600 
1601 static int inet_validate_link_af(const struct net_device *dev,
1602 				 const struct nlattr *nla)
1603 {
1604 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1605 	int err, rem;
1606 
1607 	if (dev && !__in_dev_get_rtnl(dev))
1608 		return -EAFNOSUPPORT;
1609 
1610 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1611 	if (err < 0)
1612 		return err;
1613 
1614 	if (tb[IFLA_INET_CONF]) {
1615 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1616 			int cfgid = nla_type(a);
1617 
1618 			if (nla_len(a) < 4)
1619 				return -EINVAL;
1620 
1621 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1622 				return -EINVAL;
1623 		}
1624 	}
1625 
1626 	return 0;
1627 }
1628 
1629 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1630 {
1631 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1632 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1633 	int rem;
1634 
1635 	if (!in_dev)
1636 		return -EAFNOSUPPORT;
1637 
1638 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1639 		BUG();
1640 
1641 	if (tb[IFLA_INET_CONF]) {
1642 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1643 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1644 	}
1645 
1646 	return 0;
1647 }
1648 
1649 static int inet_netconf_msgsize_devconf(int type)
1650 {
1651 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1652 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1653 
1654 	/* type -1 is used for ALL */
1655 	if (type == -1 || type == NETCONFA_FORWARDING)
1656 		size += nla_total_size(4);
1657 	if (type == -1 || type == NETCONFA_RP_FILTER)
1658 		size += nla_total_size(4);
1659 	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1660 		size += nla_total_size(4);
1661 
1662 	return size;
1663 }
1664 
1665 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1666 				     struct ipv4_devconf *devconf, u32 portid,
1667 				     u32 seq, int event, unsigned int flags,
1668 				     int type)
1669 {
1670 	struct nlmsghdr  *nlh;
1671 	struct netconfmsg *ncm;
1672 
1673 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1674 			flags);
1675 	if (nlh == NULL)
1676 		return -EMSGSIZE;
1677 
1678 	ncm = nlmsg_data(nlh);
1679 	ncm->ncm_family = AF_INET;
1680 
1681 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1682 		goto nla_put_failure;
1683 
1684 	/* type -1 is used for ALL */
1685 	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1686 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1687 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1688 		goto nla_put_failure;
1689 	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1690 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1691 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1692 		goto nla_put_failure;
1693 	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1694 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1695 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1696 		goto nla_put_failure;
1697 
1698 	return nlmsg_end(skb, nlh);
1699 
1700 nla_put_failure:
1701 	nlmsg_cancel(skb, nlh);
1702 	return -EMSGSIZE;
1703 }
1704 
1705 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1706 				 struct ipv4_devconf *devconf)
1707 {
1708 	struct sk_buff *skb;
1709 	int err = -ENOBUFS;
1710 
1711 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1712 	if (skb == NULL)
1713 		goto errout;
1714 
1715 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1716 					RTM_NEWNETCONF, 0, type);
1717 	if (err < 0) {
1718 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1719 		WARN_ON(err == -EMSGSIZE);
1720 		kfree_skb(skb);
1721 		goto errout;
1722 	}
1723 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1724 	return;
1725 errout:
1726 	if (err < 0)
1727 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1728 }
1729 
1730 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1731 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1732 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1733 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1734 };
1735 
1736 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1737 				    struct nlmsghdr *nlh,
1738 				    void *arg)
1739 {
1740 	struct net *net = sock_net(in_skb->sk);
1741 	struct nlattr *tb[NETCONFA_MAX+1];
1742 	struct netconfmsg *ncm;
1743 	struct sk_buff *skb;
1744 	struct ipv4_devconf *devconf;
1745 	struct in_device *in_dev;
1746 	struct net_device *dev;
1747 	int ifindex;
1748 	int err;
1749 
1750 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1751 			  devconf_ipv4_policy);
1752 	if (err < 0)
1753 		goto errout;
1754 
1755 	err = EINVAL;
1756 	if (!tb[NETCONFA_IFINDEX])
1757 		goto errout;
1758 
1759 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1760 	switch (ifindex) {
1761 	case NETCONFA_IFINDEX_ALL:
1762 		devconf = net->ipv4.devconf_all;
1763 		break;
1764 	case NETCONFA_IFINDEX_DEFAULT:
1765 		devconf = net->ipv4.devconf_dflt;
1766 		break;
1767 	default:
1768 		dev = __dev_get_by_index(net, ifindex);
1769 		if (dev == NULL)
1770 			goto errout;
1771 		in_dev = __in_dev_get_rtnl(dev);
1772 		if (in_dev == NULL)
1773 			goto errout;
1774 		devconf = &in_dev->cnf;
1775 		break;
1776 	}
1777 
1778 	err = -ENOBUFS;
1779 	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1780 	if (skb == NULL)
1781 		goto errout;
1782 
1783 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1784 					NETLINK_CB(in_skb).portid,
1785 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1786 					-1);
1787 	if (err < 0) {
1788 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1789 		WARN_ON(err == -EMSGSIZE);
1790 		kfree_skb(skb);
1791 		goto errout;
1792 	}
1793 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1794 errout:
1795 	return err;
1796 }
1797 
1798 #ifdef CONFIG_SYSCTL
1799 
1800 static void devinet_copy_dflt_conf(struct net *net, int i)
1801 {
1802 	struct net_device *dev;
1803 
1804 	rcu_read_lock();
1805 	for_each_netdev_rcu(net, dev) {
1806 		struct in_device *in_dev;
1807 
1808 		in_dev = __in_dev_get_rcu(dev);
1809 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1810 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1811 	}
1812 	rcu_read_unlock();
1813 }
1814 
1815 /* called with RTNL locked */
1816 static void inet_forward_change(struct net *net)
1817 {
1818 	struct net_device *dev;
1819 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1820 
1821 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1822 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1823 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1824 				    NETCONFA_IFINDEX_ALL,
1825 				    net->ipv4.devconf_all);
1826 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1827 				    NETCONFA_IFINDEX_DEFAULT,
1828 				    net->ipv4.devconf_dflt);
1829 
1830 	for_each_netdev(net, dev) {
1831 		struct in_device *in_dev;
1832 		if (on)
1833 			dev_disable_lro(dev);
1834 		rcu_read_lock();
1835 		in_dev = __in_dev_get_rcu(dev);
1836 		if (in_dev) {
1837 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1838 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1839 						    dev->ifindex, &in_dev->cnf);
1840 		}
1841 		rcu_read_unlock();
1842 	}
1843 }
1844 
1845 static int devinet_conf_proc(ctl_table *ctl, int write,
1846 			     void __user *buffer,
1847 			     size_t *lenp, loff_t *ppos)
1848 {
1849 	int old_value = *(int *)ctl->data;
1850 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1851 	int new_value = *(int *)ctl->data;
1852 
1853 	if (write) {
1854 		struct ipv4_devconf *cnf = ctl->extra1;
1855 		struct net *net = ctl->extra2;
1856 		int i = (int *)ctl->data - cnf->data;
1857 
1858 		set_bit(i, cnf->state);
1859 
1860 		if (cnf == net->ipv4.devconf_dflt)
1861 			devinet_copy_dflt_conf(net, i);
1862 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1863 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1864 			if ((new_value == 0) && (old_value != 0))
1865 				rt_cache_flush(net);
1866 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1867 		    new_value != old_value) {
1868 			int ifindex;
1869 
1870 			if (cnf == net->ipv4.devconf_dflt)
1871 				ifindex = NETCONFA_IFINDEX_DEFAULT;
1872 			else if (cnf == net->ipv4.devconf_all)
1873 				ifindex = NETCONFA_IFINDEX_ALL;
1874 			else {
1875 				struct in_device *idev =
1876 					container_of(cnf, struct in_device,
1877 						     cnf);
1878 				ifindex = idev->dev->ifindex;
1879 			}
1880 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1881 						    ifindex, cnf);
1882 		}
1883 	}
1884 
1885 	return ret;
1886 }
1887 
1888 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1889 				  void __user *buffer,
1890 				  size_t *lenp, loff_t *ppos)
1891 {
1892 	int *valp = ctl->data;
1893 	int val = *valp;
1894 	loff_t pos = *ppos;
1895 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1896 
1897 	if (write && *valp != val) {
1898 		struct net *net = ctl->extra2;
1899 
1900 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1901 			if (!rtnl_trylock()) {
1902 				/* Restore the original values before restarting */
1903 				*valp = val;
1904 				*ppos = pos;
1905 				return restart_syscall();
1906 			}
1907 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1908 				inet_forward_change(net);
1909 			} else {
1910 				struct ipv4_devconf *cnf = ctl->extra1;
1911 				struct in_device *idev =
1912 					container_of(cnf, struct in_device, cnf);
1913 				if (*valp)
1914 					dev_disable_lro(idev->dev);
1915 				inet_netconf_notify_devconf(net,
1916 							    NETCONFA_FORWARDING,
1917 							    idev->dev->ifindex,
1918 							    cnf);
1919 			}
1920 			rtnl_unlock();
1921 			rt_cache_flush(net);
1922 		} else
1923 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1924 						    NETCONFA_IFINDEX_DEFAULT,
1925 						    net->ipv4.devconf_dflt);
1926 	}
1927 
1928 	return ret;
1929 }
1930 
1931 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1932 				void __user *buffer,
1933 				size_t *lenp, loff_t *ppos)
1934 {
1935 	int *valp = ctl->data;
1936 	int val = *valp;
1937 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1938 	struct net *net = ctl->extra2;
1939 
1940 	if (write && *valp != val)
1941 		rt_cache_flush(net);
1942 
1943 	return ret;
1944 }
1945 
1946 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1947 	{ \
1948 		.procname	= name, \
1949 		.data		= ipv4_devconf.data + \
1950 				  IPV4_DEVCONF_ ## attr - 1, \
1951 		.maxlen		= sizeof(int), \
1952 		.mode		= mval, \
1953 		.proc_handler	= proc, \
1954 		.extra1		= &ipv4_devconf, \
1955 	}
1956 
1957 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1958 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1959 
1960 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1961 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1962 
1963 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1964 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1965 
1966 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1967 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1968 
1969 static struct devinet_sysctl_table {
1970 	struct ctl_table_header *sysctl_header;
1971 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1972 } devinet_sysctl = {
1973 	.devinet_vars = {
1974 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1975 					     devinet_sysctl_forward),
1976 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1977 
1978 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1979 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1980 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1981 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1982 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1983 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1984 					"accept_source_route"),
1985 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1986 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1987 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1988 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1989 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1990 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1991 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1992 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1993 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1994 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1995 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1996 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1997 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1998 
1999 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2000 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2001 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2002 					      "force_igmp_version"),
2003 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2004 					      "promote_secondaries"),
2005 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2006 					      "route_localnet"),
2007 	},
2008 };
2009 
2010 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2011 					struct ipv4_devconf *p)
2012 {
2013 	int i;
2014 	struct devinet_sysctl_table *t;
2015 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2016 
2017 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2018 	if (!t)
2019 		goto out;
2020 
2021 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2022 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2023 		t->devinet_vars[i].extra1 = p;
2024 		t->devinet_vars[i].extra2 = net;
2025 	}
2026 
2027 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2028 
2029 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2030 	if (!t->sysctl_header)
2031 		goto free;
2032 
2033 	p->sysctl = t;
2034 	return 0;
2035 
2036 free:
2037 	kfree(t);
2038 out:
2039 	return -ENOBUFS;
2040 }
2041 
2042 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2043 {
2044 	struct devinet_sysctl_table *t = cnf->sysctl;
2045 
2046 	if (t == NULL)
2047 		return;
2048 
2049 	cnf->sysctl = NULL;
2050 	unregister_net_sysctl_table(t->sysctl_header);
2051 	kfree(t);
2052 }
2053 
2054 static void devinet_sysctl_register(struct in_device *idev)
2055 {
2056 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2057 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2058 					&idev->cnf);
2059 }
2060 
2061 static void devinet_sysctl_unregister(struct in_device *idev)
2062 {
2063 	__devinet_sysctl_unregister(&idev->cnf);
2064 	neigh_sysctl_unregister(idev->arp_parms);
2065 }
2066 
2067 static struct ctl_table ctl_forward_entry[] = {
2068 	{
2069 		.procname	= "ip_forward",
2070 		.data		= &ipv4_devconf.data[
2071 					IPV4_DEVCONF_FORWARDING - 1],
2072 		.maxlen		= sizeof(int),
2073 		.mode		= 0644,
2074 		.proc_handler	= devinet_sysctl_forward,
2075 		.extra1		= &ipv4_devconf,
2076 		.extra2		= &init_net,
2077 	},
2078 	{ },
2079 };
2080 #endif
2081 
2082 static __net_init int devinet_init_net(struct net *net)
2083 {
2084 	int err;
2085 	struct ipv4_devconf *all, *dflt;
2086 #ifdef CONFIG_SYSCTL
2087 	struct ctl_table *tbl = ctl_forward_entry;
2088 	struct ctl_table_header *forw_hdr;
2089 #endif
2090 
2091 	err = -ENOMEM;
2092 	all = &ipv4_devconf;
2093 	dflt = &ipv4_devconf_dflt;
2094 
2095 	if (!net_eq(net, &init_net)) {
2096 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2097 		if (all == NULL)
2098 			goto err_alloc_all;
2099 
2100 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2101 		if (dflt == NULL)
2102 			goto err_alloc_dflt;
2103 
2104 #ifdef CONFIG_SYSCTL
2105 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2106 		if (tbl == NULL)
2107 			goto err_alloc_ctl;
2108 
2109 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2110 		tbl[0].extra1 = all;
2111 		tbl[0].extra2 = net;
2112 #endif
2113 	}
2114 
2115 #ifdef CONFIG_SYSCTL
2116 	err = __devinet_sysctl_register(net, "all", all);
2117 	if (err < 0)
2118 		goto err_reg_all;
2119 
2120 	err = __devinet_sysctl_register(net, "default", dflt);
2121 	if (err < 0)
2122 		goto err_reg_dflt;
2123 
2124 	err = -ENOMEM;
2125 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2126 	if (forw_hdr == NULL)
2127 		goto err_reg_ctl;
2128 	net->ipv4.forw_hdr = forw_hdr;
2129 #endif
2130 
2131 	net->ipv4.devconf_all = all;
2132 	net->ipv4.devconf_dflt = dflt;
2133 	return 0;
2134 
2135 #ifdef CONFIG_SYSCTL
2136 err_reg_ctl:
2137 	__devinet_sysctl_unregister(dflt);
2138 err_reg_dflt:
2139 	__devinet_sysctl_unregister(all);
2140 err_reg_all:
2141 	if (tbl != ctl_forward_entry)
2142 		kfree(tbl);
2143 err_alloc_ctl:
2144 #endif
2145 	if (dflt != &ipv4_devconf_dflt)
2146 		kfree(dflt);
2147 err_alloc_dflt:
2148 	if (all != &ipv4_devconf)
2149 		kfree(all);
2150 err_alloc_all:
2151 	return err;
2152 }
2153 
2154 static __net_exit void devinet_exit_net(struct net *net)
2155 {
2156 #ifdef CONFIG_SYSCTL
2157 	struct ctl_table *tbl;
2158 
2159 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2160 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2161 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2162 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2163 	kfree(tbl);
2164 #endif
2165 	kfree(net->ipv4.devconf_dflt);
2166 	kfree(net->ipv4.devconf_all);
2167 }
2168 
2169 static __net_initdata struct pernet_operations devinet_ops = {
2170 	.init = devinet_init_net,
2171 	.exit = devinet_exit_net,
2172 };
2173 
2174 static struct rtnl_af_ops inet_af_ops = {
2175 	.family		  = AF_INET,
2176 	.fill_link_af	  = inet_fill_link_af,
2177 	.get_link_af_size = inet_get_link_af_size,
2178 	.validate_link_af = inet_validate_link_af,
2179 	.set_link_af	  = inet_set_link_af,
2180 };
2181 
2182 void __init devinet_init(void)
2183 {
2184 	int i;
2185 
2186 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2187 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2188 
2189 	register_pernet_subsys(&devinet_ops);
2190 
2191 	register_gifconf(PF_INET, inet_gifconf);
2192 	register_netdevice_notifier(&ip_netdev_notifier);
2193 
2194 	schedule_delayed_work(&check_lifetime_work, 0);
2195 
2196 	rtnl_af_register(&inet_af_ops);
2197 
2198 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2199 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2200 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2201 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2202 		      NULL, NULL);
2203 }
2204 
2205