xref: /openbmc/linux/net/ipv4/devinet.c (revision 4e1a33b1)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67 
68 static struct ipv4_devconf ipv4_devconf = {
69 	.data = {
70 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
71 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
74 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
75 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
76 	},
77 };
78 
79 static struct ipv4_devconf ipv4_devconf_dflt = {
80 	.data = {
81 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
87 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
88 	},
89 };
90 
91 #define IPV4_DEVCONF_DFLT(net, attr) \
92 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
93 
94 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
95 	[IFA_LOCAL]     	= { .type = NLA_U32 },
96 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
97 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
98 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
99 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
100 	[IFA_FLAGS]		= { .type = NLA_U32 },
101 };
102 
103 #define IN4_ADDR_HSIZE_SHIFT	8
104 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
105 
106 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
107 
108 static u32 inet_addr_hash(const struct net *net, __be32 addr)
109 {
110 	u32 val = (__force u32) addr ^ net_hash_mix(net);
111 
112 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
113 }
114 
115 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
116 {
117 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
118 
119 	ASSERT_RTNL();
120 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
121 }
122 
123 static void inet_hash_remove(struct in_ifaddr *ifa)
124 {
125 	ASSERT_RTNL();
126 	hlist_del_init_rcu(&ifa->hash);
127 }
128 
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139 	u32 hash = inet_addr_hash(net, addr);
140 	struct net_device *result = NULL;
141 	struct in_ifaddr *ifa;
142 
143 	rcu_read_lock();
144 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
145 		if (ifa->ifa_local == addr) {
146 			struct net_device *dev = ifa->ifa_dev->dev;
147 
148 			if (!net_eq(dev_net(dev), net))
149 				continue;
150 			result = dev;
151 			break;
152 		}
153 	}
154 	if (!result) {
155 		struct flowi4 fl4 = { .daddr = addr };
156 		struct fib_result res = { 0 };
157 		struct fib_table *local;
158 
159 		/* Fallback to FIB local table so that communication
160 		 * over loopback subnets work.
161 		 */
162 		local = fib_get_table(net, RT_TABLE_LOCAL);
163 		if (local &&
164 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
165 		    res.type == RTN_LOCAL)
166 			result = FIB_RES_DEV(res);
167 	}
168 	if (result && devref)
169 		dev_hold(result);
170 	rcu_read_unlock();
171 	return result;
172 }
173 EXPORT_SYMBOL(__ip_dev_find);
174 
175 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
176 
177 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
178 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
179 			 int destroy);
180 #ifdef CONFIG_SYSCTL
181 static int devinet_sysctl_register(struct in_device *idev);
182 static void devinet_sysctl_unregister(struct in_device *idev);
183 #else
184 static int devinet_sysctl_register(struct in_device *idev)
185 {
186 	return 0;
187 }
188 static void devinet_sysctl_unregister(struct in_device *idev)
189 {
190 }
191 #endif
192 
193 /* Locks all the inet devices. */
194 
195 static struct in_ifaddr *inet_alloc_ifa(void)
196 {
197 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
198 }
199 
200 static void inet_rcu_free_ifa(struct rcu_head *head)
201 {
202 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
203 	if (ifa->ifa_dev)
204 		in_dev_put(ifa->ifa_dev);
205 	kfree(ifa);
206 }
207 
208 static void inet_free_ifa(struct in_ifaddr *ifa)
209 {
210 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
211 }
212 
213 void in_dev_finish_destroy(struct in_device *idev)
214 {
215 	struct net_device *dev = idev->dev;
216 
217 	WARN_ON(idev->ifa_list);
218 	WARN_ON(idev->mc_list);
219 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
220 #ifdef NET_REFCNT_DEBUG
221 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
222 #endif
223 	dev_put(dev);
224 	if (!idev->dead)
225 		pr_err("Freeing alive in_device %p\n", idev);
226 	else
227 		kfree(idev);
228 }
229 EXPORT_SYMBOL(in_dev_finish_destroy);
230 
231 static struct in_device *inetdev_init(struct net_device *dev)
232 {
233 	struct in_device *in_dev;
234 	int err = -ENOMEM;
235 
236 	ASSERT_RTNL();
237 
238 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
239 	if (!in_dev)
240 		goto out;
241 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
242 			sizeof(in_dev->cnf));
243 	in_dev->cnf.sysctl = NULL;
244 	in_dev->dev = dev;
245 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
246 	if (!in_dev->arp_parms)
247 		goto out_kfree;
248 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
249 		dev_disable_lro(dev);
250 	/* Reference in_dev->dev */
251 	dev_hold(dev);
252 	/* Account for reference dev->ip_ptr (below) */
253 	in_dev_hold(in_dev);
254 
255 	err = devinet_sysctl_register(in_dev);
256 	if (err) {
257 		in_dev->dead = 1;
258 		in_dev_put(in_dev);
259 		in_dev = NULL;
260 		goto out;
261 	}
262 	ip_mc_init_dev(in_dev);
263 	if (dev->flags & IFF_UP)
264 		ip_mc_up(in_dev);
265 
266 	/* we can receive as soon as ip_ptr is set -- do this last */
267 	rcu_assign_pointer(dev->ip_ptr, in_dev);
268 out:
269 	return in_dev ?: ERR_PTR(err);
270 out_kfree:
271 	kfree(in_dev);
272 	in_dev = NULL;
273 	goto out;
274 }
275 
276 static void in_dev_rcu_put(struct rcu_head *head)
277 {
278 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
279 	in_dev_put(idev);
280 }
281 
282 static void inetdev_destroy(struct in_device *in_dev)
283 {
284 	struct in_ifaddr *ifa;
285 	struct net_device *dev;
286 
287 	ASSERT_RTNL();
288 
289 	dev = in_dev->dev;
290 
291 	in_dev->dead = 1;
292 
293 	ip_mc_destroy_dev(in_dev);
294 
295 	while ((ifa = in_dev->ifa_list) != NULL) {
296 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
297 		inet_free_ifa(ifa);
298 	}
299 
300 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
301 
302 	devinet_sysctl_unregister(in_dev);
303 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
304 	arp_ifdown(dev);
305 
306 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
307 }
308 
309 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
310 {
311 	rcu_read_lock();
312 	for_primary_ifa(in_dev) {
313 		if (inet_ifa_match(a, ifa)) {
314 			if (!b || inet_ifa_match(b, ifa)) {
315 				rcu_read_unlock();
316 				return 1;
317 			}
318 		}
319 	} endfor_ifa(in_dev);
320 	rcu_read_unlock();
321 	return 0;
322 }
323 
324 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
325 			 int destroy, struct nlmsghdr *nlh, u32 portid)
326 {
327 	struct in_ifaddr *promote = NULL;
328 	struct in_ifaddr *ifa, *ifa1 = *ifap;
329 	struct in_ifaddr *last_prim = in_dev->ifa_list;
330 	struct in_ifaddr *prev_prom = NULL;
331 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
332 
333 	ASSERT_RTNL();
334 
335 	if (in_dev->dead)
336 		goto no_promotions;
337 
338 	/* 1. Deleting primary ifaddr forces deletion all secondaries
339 	 * unless alias promotion is set
340 	 **/
341 
342 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
343 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
344 
345 		while ((ifa = *ifap1) != NULL) {
346 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
347 			    ifa1->ifa_scope <= ifa->ifa_scope)
348 				last_prim = ifa;
349 
350 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
351 			    ifa1->ifa_mask != ifa->ifa_mask ||
352 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
353 				ifap1 = &ifa->ifa_next;
354 				prev_prom = ifa;
355 				continue;
356 			}
357 
358 			if (!do_promote) {
359 				inet_hash_remove(ifa);
360 				*ifap1 = ifa->ifa_next;
361 
362 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
363 				blocking_notifier_call_chain(&inetaddr_chain,
364 						NETDEV_DOWN, ifa);
365 				inet_free_ifa(ifa);
366 			} else {
367 				promote = ifa;
368 				break;
369 			}
370 		}
371 	}
372 
373 	/* On promotion all secondaries from subnet are changing
374 	 * the primary IP, we must remove all their routes silently
375 	 * and later to add them back with new prefsrc. Do this
376 	 * while all addresses are on the device list.
377 	 */
378 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
379 		if (ifa1->ifa_mask == ifa->ifa_mask &&
380 		    inet_ifa_match(ifa1->ifa_address, ifa))
381 			fib_del_ifaddr(ifa, ifa1);
382 	}
383 
384 no_promotions:
385 	/* 2. Unlink it */
386 
387 	*ifap = ifa1->ifa_next;
388 	inet_hash_remove(ifa1);
389 
390 	/* 3. Announce address deletion */
391 
392 	/* Send message first, then call notifier.
393 	   At first sight, FIB update triggered by notifier
394 	   will refer to already deleted ifaddr, that could confuse
395 	   netlink listeners. It is not true: look, gated sees
396 	   that route deleted and if it still thinks that ifaddr
397 	   is valid, it will try to restore deleted routes... Grr.
398 	   So that, this order is correct.
399 	 */
400 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
401 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
402 
403 	if (promote) {
404 		struct in_ifaddr *next_sec = promote->ifa_next;
405 
406 		if (prev_prom) {
407 			prev_prom->ifa_next = promote->ifa_next;
408 			promote->ifa_next = last_prim->ifa_next;
409 			last_prim->ifa_next = promote;
410 		}
411 
412 		promote->ifa_flags &= ~IFA_F_SECONDARY;
413 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
414 		blocking_notifier_call_chain(&inetaddr_chain,
415 				NETDEV_UP, promote);
416 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
417 			if (ifa1->ifa_mask != ifa->ifa_mask ||
418 			    !inet_ifa_match(ifa1->ifa_address, ifa))
419 					continue;
420 			fib_add_ifaddr(ifa);
421 		}
422 
423 	}
424 	if (destroy)
425 		inet_free_ifa(ifa1);
426 }
427 
428 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
429 			 int destroy)
430 {
431 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
432 }
433 
434 static void check_lifetime(struct work_struct *work);
435 
436 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
437 
438 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
439 			     u32 portid)
440 {
441 	struct in_device *in_dev = ifa->ifa_dev;
442 	struct in_ifaddr *ifa1, **ifap, **last_primary;
443 
444 	ASSERT_RTNL();
445 
446 	if (!ifa->ifa_local) {
447 		inet_free_ifa(ifa);
448 		return 0;
449 	}
450 
451 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
452 	last_primary = &in_dev->ifa_list;
453 
454 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
455 	     ifap = &ifa1->ifa_next) {
456 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
457 		    ifa->ifa_scope <= ifa1->ifa_scope)
458 			last_primary = &ifa1->ifa_next;
459 		if (ifa1->ifa_mask == ifa->ifa_mask &&
460 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
461 			if (ifa1->ifa_local == ifa->ifa_local) {
462 				inet_free_ifa(ifa);
463 				return -EEXIST;
464 			}
465 			if (ifa1->ifa_scope != ifa->ifa_scope) {
466 				inet_free_ifa(ifa);
467 				return -EINVAL;
468 			}
469 			ifa->ifa_flags |= IFA_F_SECONDARY;
470 		}
471 	}
472 
473 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
474 		prandom_seed((__force u32) ifa->ifa_local);
475 		ifap = last_primary;
476 	}
477 
478 	ifa->ifa_next = *ifap;
479 	*ifap = ifa;
480 
481 	inet_hash_insert(dev_net(in_dev->dev), ifa);
482 
483 	cancel_delayed_work(&check_lifetime_work);
484 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
485 
486 	/* Send message first, then call notifier.
487 	   Notifier will trigger FIB update, so that
488 	   listeners of netlink will know about new ifaddr */
489 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
490 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
491 
492 	return 0;
493 }
494 
495 static int inet_insert_ifa(struct in_ifaddr *ifa)
496 {
497 	return __inet_insert_ifa(ifa, NULL, 0);
498 }
499 
500 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
501 {
502 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
503 
504 	ASSERT_RTNL();
505 
506 	if (!in_dev) {
507 		inet_free_ifa(ifa);
508 		return -ENOBUFS;
509 	}
510 	ipv4_devconf_setall(in_dev);
511 	neigh_parms_data_state_setall(in_dev->arp_parms);
512 	if (ifa->ifa_dev != in_dev) {
513 		WARN_ON(ifa->ifa_dev);
514 		in_dev_hold(in_dev);
515 		ifa->ifa_dev = in_dev;
516 	}
517 	if (ipv4_is_loopback(ifa->ifa_local))
518 		ifa->ifa_scope = RT_SCOPE_HOST;
519 	return inet_insert_ifa(ifa);
520 }
521 
522 /* Caller must hold RCU or RTNL :
523  * We dont take a reference on found in_device
524  */
525 struct in_device *inetdev_by_index(struct net *net, int ifindex)
526 {
527 	struct net_device *dev;
528 	struct in_device *in_dev = NULL;
529 
530 	rcu_read_lock();
531 	dev = dev_get_by_index_rcu(net, ifindex);
532 	if (dev)
533 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
534 	rcu_read_unlock();
535 	return in_dev;
536 }
537 EXPORT_SYMBOL(inetdev_by_index);
538 
539 /* Called only from RTNL semaphored context. No locks. */
540 
541 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
542 				    __be32 mask)
543 {
544 	ASSERT_RTNL();
545 
546 	for_primary_ifa(in_dev) {
547 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
548 			return ifa;
549 	} endfor_ifa(in_dev);
550 	return NULL;
551 }
552 
553 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
554 {
555 	struct ip_mreqn mreq = {
556 		.imr_multiaddr.s_addr = ifa->ifa_address,
557 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
558 	};
559 	int ret;
560 
561 	ASSERT_RTNL();
562 
563 	lock_sock(sk);
564 	if (join)
565 		ret = ip_mc_join_group(sk, &mreq);
566 	else
567 		ret = ip_mc_leave_group(sk, &mreq);
568 	release_sock(sk);
569 
570 	return ret;
571 }
572 
573 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
574 {
575 	struct net *net = sock_net(skb->sk);
576 	struct nlattr *tb[IFA_MAX+1];
577 	struct in_device *in_dev;
578 	struct ifaddrmsg *ifm;
579 	struct in_ifaddr *ifa, **ifap;
580 	int err = -EINVAL;
581 
582 	ASSERT_RTNL();
583 
584 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
585 	if (err < 0)
586 		goto errout;
587 
588 	ifm = nlmsg_data(nlh);
589 	in_dev = inetdev_by_index(net, ifm->ifa_index);
590 	if (!in_dev) {
591 		err = -ENODEV;
592 		goto errout;
593 	}
594 
595 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
596 	     ifap = &ifa->ifa_next) {
597 		if (tb[IFA_LOCAL] &&
598 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
599 			continue;
600 
601 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
602 			continue;
603 
604 		if (tb[IFA_ADDRESS] &&
605 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
606 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
607 			continue;
608 
609 		if (ipv4_is_multicast(ifa->ifa_address))
610 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
611 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
612 		return 0;
613 	}
614 
615 	err = -EADDRNOTAVAIL;
616 errout:
617 	return err;
618 }
619 
620 #define INFINITY_LIFE_TIME	0xFFFFFFFF
621 
622 static void check_lifetime(struct work_struct *work)
623 {
624 	unsigned long now, next, next_sec, next_sched;
625 	struct in_ifaddr *ifa;
626 	struct hlist_node *n;
627 	int i;
628 
629 	now = jiffies;
630 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
631 
632 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
633 		bool change_needed = false;
634 
635 		rcu_read_lock();
636 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
637 			unsigned long age;
638 
639 			if (ifa->ifa_flags & IFA_F_PERMANENT)
640 				continue;
641 
642 			/* We try to batch several events at once. */
643 			age = (now - ifa->ifa_tstamp +
644 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
645 
646 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
647 			    age >= ifa->ifa_valid_lft) {
648 				change_needed = true;
649 			} else if (ifa->ifa_preferred_lft ==
650 				   INFINITY_LIFE_TIME) {
651 				continue;
652 			} else if (age >= ifa->ifa_preferred_lft) {
653 				if (time_before(ifa->ifa_tstamp +
654 						ifa->ifa_valid_lft * HZ, next))
655 					next = ifa->ifa_tstamp +
656 					       ifa->ifa_valid_lft * HZ;
657 
658 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
659 					change_needed = true;
660 			} else if (time_before(ifa->ifa_tstamp +
661 					       ifa->ifa_preferred_lft * HZ,
662 					       next)) {
663 				next = ifa->ifa_tstamp +
664 				       ifa->ifa_preferred_lft * HZ;
665 			}
666 		}
667 		rcu_read_unlock();
668 		if (!change_needed)
669 			continue;
670 		rtnl_lock();
671 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
672 			unsigned long age;
673 
674 			if (ifa->ifa_flags & IFA_F_PERMANENT)
675 				continue;
676 
677 			/* We try to batch several events at once. */
678 			age = (now - ifa->ifa_tstamp +
679 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
680 
681 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
682 			    age >= ifa->ifa_valid_lft) {
683 				struct in_ifaddr **ifap;
684 
685 				for (ifap = &ifa->ifa_dev->ifa_list;
686 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
687 					if (*ifap == ifa) {
688 						inet_del_ifa(ifa->ifa_dev,
689 							     ifap, 1);
690 						break;
691 					}
692 				}
693 			} else if (ifa->ifa_preferred_lft !=
694 				   INFINITY_LIFE_TIME &&
695 				   age >= ifa->ifa_preferred_lft &&
696 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
697 				ifa->ifa_flags |= IFA_F_DEPRECATED;
698 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
699 			}
700 		}
701 		rtnl_unlock();
702 	}
703 
704 	next_sec = round_jiffies_up(next);
705 	next_sched = next;
706 
707 	/* If rounded timeout is accurate enough, accept it. */
708 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
709 		next_sched = next_sec;
710 
711 	now = jiffies;
712 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
713 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
714 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
715 
716 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
717 			next_sched - now);
718 }
719 
720 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
721 			     __u32 prefered_lft)
722 {
723 	unsigned long timeout;
724 
725 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
726 
727 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
728 	if (addrconf_finite_timeout(timeout))
729 		ifa->ifa_valid_lft = timeout;
730 	else
731 		ifa->ifa_flags |= IFA_F_PERMANENT;
732 
733 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
734 	if (addrconf_finite_timeout(timeout)) {
735 		if (timeout == 0)
736 			ifa->ifa_flags |= IFA_F_DEPRECATED;
737 		ifa->ifa_preferred_lft = timeout;
738 	}
739 	ifa->ifa_tstamp = jiffies;
740 	if (!ifa->ifa_cstamp)
741 		ifa->ifa_cstamp = ifa->ifa_tstamp;
742 }
743 
744 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
745 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
746 {
747 	struct nlattr *tb[IFA_MAX+1];
748 	struct in_ifaddr *ifa;
749 	struct ifaddrmsg *ifm;
750 	struct net_device *dev;
751 	struct in_device *in_dev;
752 	int err;
753 
754 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
755 	if (err < 0)
756 		goto errout;
757 
758 	ifm = nlmsg_data(nlh);
759 	err = -EINVAL;
760 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
761 		goto errout;
762 
763 	dev = __dev_get_by_index(net, ifm->ifa_index);
764 	err = -ENODEV;
765 	if (!dev)
766 		goto errout;
767 
768 	in_dev = __in_dev_get_rtnl(dev);
769 	err = -ENOBUFS;
770 	if (!in_dev)
771 		goto errout;
772 
773 	ifa = inet_alloc_ifa();
774 	if (!ifa)
775 		/*
776 		 * A potential indev allocation can be left alive, it stays
777 		 * assigned to its device and is destroy with it.
778 		 */
779 		goto errout;
780 
781 	ipv4_devconf_setall(in_dev);
782 	neigh_parms_data_state_setall(in_dev->arp_parms);
783 	in_dev_hold(in_dev);
784 
785 	if (!tb[IFA_ADDRESS])
786 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
787 
788 	INIT_HLIST_NODE(&ifa->hash);
789 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
790 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
791 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
792 					 ifm->ifa_flags;
793 	ifa->ifa_scope = ifm->ifa_scope;
794 	ifa->ifa_dev = in_dev;
795 
796 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
797 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
798 
799 	if (tb[IFA_BROADCAST])
800 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
801 
802 	if (tb[IFA_LABEL])
803 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
804 	else
805 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
806 
807 	if (tb[IFA_CACHEINFO]) {
808 		struct ifa_cacheinfo *ci;
809 
810 		ci = nla_data(tb[IFA_CACHEINFO]);
811 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
812 			err = -EINVAL;
813 			goto errout_free;
814 		}
815 		*pvalid_lft = ci->ifa_valid;
816 		*pprefered_lft = ci->ifa_prefered;
817 	}
818 
819 	return ifa;
820 
821 errout_free:
822 	inet_free_ifa(ifa);
823 errout:
824 	return ERR_PTR(err);
825 }
826 
827 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
828 {
829 	struct in_device *in_dev = ifa->ifa_dev;
830 	struct in_ifaddr *ifa1, **ifap;
831 
832 	if (!ifa->ifa_local)
833 		return NULL;
834 
835 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
836 	     ifap = &ifa1->ifa_next) {
837 		if (ifa1->ifa_mask == ifa->ifa_mask &&
838 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
839 		    ifa1->ifa_local == ifa->ifa_local)
840 			return ifa1;
841 	}
842 	return NULL;
843 }
844 
845 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
846 {
847 	struct net *net = sock_net(skb->sk);
848 	struct in_ifaddr *ifa;
849 	struct in_ifaddr *ifa_existing;
850 	__u32 valid_lft = INFINITY_LIFE_TIME;
851 	__u32 prefered_lft = INFINITY_LIFE_TIME;
852 
853 	ASSERT_RTNL();
854 
855 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
856 	if (IS_ERR(ifa))
857 		return PTR_ERR(ifa);
858 
859 	ifa_existing = find_matching_ifa(ifa);
860 	if (!ifa_existing) {
861 		/* It would be best to check for !NLM_F_CREATE here but
862 		 * userspace already relies on not having to provide this.
863 		 */
864 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
865 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
866 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
867 					       true, ifa);
868 
869 			if (ret < 0) {
870 				inet_free_ifa(ifa);
871 				return ret;
872 			}
873 		}
874 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
875 	} else {
876 		inet_free_ifa(ifa);
877 
878 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
879 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
880 			return -EEXIST;
881 		ifa = ifa_existing;
882 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
883 		cancel_delayed_work(&check_lifetime_work);
884 		queue_delayed_work(system_power_efficient_wq,
885 				&check_lifetime_work, 0);
886 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
887 	}
888 	return 0;
889 }
890 
891 /*
892  *	Determine a default network mask, based on the IP address.
893  */
894 
895 static int inet_abc_len(__be32 addr)
896 {
897 	int rc = -1;	/* Something else, probably a multicast. */
898 
899 	if (ipv4_is_zeronet(addr))
900 		rc = 0;
901 	else {
902 		__u32 haddr = ntohl(addr);
903 
904 		if (IN_CLASSA(haddr))
905 			rc = 8;
906 		else if (IN_CLASSB(haddr))
907 			rc = 16;
908 		else if (IN_CLASSC(haddr))
909 			rc = 24;
910 	}
911 
912 	return rc;
913 }
914 
915 
916 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
917 {
918 	struct ifreq ifr;
919 	struct sockaddr_in sin_orig;
920 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
921 	struct in_device *in_dev;
922 	struct in_ifaddr **ifap = NULL;
923 	struct in_ifaddr *ifa = NULL;
924 	struct net_device *dev;
925 	char *colon;
926 	int ret = -EFAULT;
927 	int tryaddrmatch = 0;
928 
929 	/*
930 	 *	Fetch the caller's info block into kernel space
931 	 */
932 
933 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
934 		goto out;
935 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
936 
937 	/* save original address for comparison */
938 	memcpy(&sin_orig, sin, sizeof(*sin));
939 
940 	colon = strchr(ifr.ifr_name, ':');
941 	if (colon)
942 		*colon = 0;
943 
944 	dev_load(net, ifr.ifr_name);
945 
946 	switch (cmd) {
947 	case SIOCGIFADDR:	/* Get interface address */
948 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
949 	case SIOCGIFDSTADDR:	/* Get the destination address */
950 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
951 		/* Note that these ioctls will not sleep,
952 		   so that we do not impose a lock.
953 		   One day we will be forced to put shlock here (I mean SMP)
954 		 */
955 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
956 		memset(sin, 0, sizeof(*sin));
957 		sin->sin_family = AF_INET;
958 		break;
959 
960 	case SIOCSIFFLAGS:
961 		ret = -EPERM;
962 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
963 			goto out;
964 		break;
965 	case SIOCSIFADDR:	/* Set interface address (and family) */
966 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
967 	case SIOCSIFDSTADDR:	/* Set the destination address */
968 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
969 		ret = -EPERM;
970 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
971 			goto out;
972 		ret = -EINVAL;
973 		if (sin->sin_family != AF_INET)
974 			goto out;
975 		break;
976 	default:
977 		ret = -EINVAL;
978 		goto out;
979 	}
980 
981 	rtnl_lock();
982 
983 	ret = -ENODEV;
984 	dev = __dev_get_by_name(net, ifr.ifr_name);
985 	if (!dev)
986 		goto done;
987 
988 	if (colon)
989 		*colon = ':';
990 
991 	in_dev = __in_dev_get_rtnl(dev);
992 	if (in_dev) {
993 		if (tryaddrmatch) {
994 			/* Matthias Andree */
995 			/* compare label and address (4.4BSD style) */
996 			/* note: we only do this for a limited set of ioctls
997 			   and only if the original address family was AF_INET.
998 			   This is checked above. */
999 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1000 			     ifap = &ifa->ifa_next) {
1001 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1002 				    sin_orig.sin_addr.s_addr ==
1003 							ifa->ifa_local) {
1004 					break; /* found */
1005 				}
1006 			}
1007 		}
1008 		/* we didn't get a match, maybe the application is
1009 		   4.3BSD-style and passed in junk so we fall back to
1010 		   comparing just the label */
1011 		if (!ifa) {
1012 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1013 			     ifap = &ifa->ifa_next)
1014 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1015 					break;
1016 		}
1017 	}
1018 
1019 	ret = -EADDRNOTAVAIL;
1020 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1021 		goto done;
1022 
1023 	switch (cmd) {
1024 	case SIOCGIFADDR:	/* Get interface address */
1025 		sin->sin_addr.s_addr = ifa->ifa_local;
1026 		goto rarok;
1027 
1028 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1029 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1030 		goto rarok;
1031 
1032 	case SIOCGIFDSTADDR:	/* Get the destination address */
1033 		sin->sin_addr.s_addr = ifa->ifa_address;
1034 		goto rarok;
1035 
1036 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1037 		sin->sin_addr.s_addr = ifa->ifa_mask;
1038 		goto rarok;
1039 
1040 	case SIOCSIFFLAGS:
1041 		if (colon) {
1042 			ret = -EADDRNOTAVAIL;
1043 			if (!ifa)
1044 				break;
1045 			ret = 0;
1046 			if (!(ifr.ifr_flags & IFF_UP))
1047 				inet_del_ifa(in_dev, ifap, 1);
1048 			break;
1049 		}
1050 		ret = dev_change_flags(dev, ifr.ifr_flags);
1051 		break;
1052 
1053 	case SIOCSIFADDR:	/* Set interface address (and family) */
1054 		ret = -EINVAL;
1055 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1056 			break;
1057 
1058 		if (!ifa) {
1059 			ret = -ENOBUFS;
1060 			ifa = inet_alloc_ifa();
1061 			if (!ifa)
1062 				break;
1063 			INIT_HLIST_NODE(&ifa->hash);
1064 			if (colon)
1065 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1066 			else
1067 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1068 		} else {
1069 			ret = 0;
1070 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1071 				break;
1072 			inet_del_ifa(in_dev, ifap, 0);
1073 			ifa->ifa_broadcast = 0;
1074 			ifa->ifa_scope = 0;
1075 		}
1076 
1077 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1078 
1079 		if (!(dev->flags & IFF_POINTOPOINT)) {
1080 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1081 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1082 			if ((dev->flags & IFF_BROADCAST) &&
1083 			    ifa->ifa_prefixlen < 31)
1084 				ifa->ifa_broadcast = ifa->ifa_address |
1085 						     ~ifa->ifa_mask;
1086 		} else {
1087 			ifa->ifa_prefixlen = 32;
1088 			ifa->ifa_mask = inet_make_mask(32);
1089 		}
1090 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1091 		ret = inet_set_ifa(dev, ifa);
1092 		break;
1093 
1094 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1095 		ret = 0;
1096 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1097 			inet_del_ifa(in_dev, ifap, 0);
1098 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1099 			inet_insert_ifa(ifa);
1100 		}
1101 		break;
1102 
1103 	case SIOCSIFDSTADDR:	/* Set the destination address */
1104 		ret = 0;
1105 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1106 			break;
1107 		ret = -EINVAL;
1108 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1109 			break;
1110 		ret = 0;
1111 		inet_del_ifa(in_dev, ifap, 0);
1112 		ifa->ifa_address = sin->sin_addr.s_addr;
1113 		inet_insert_ifa(ifa);
1114 		break;
1115 
1116 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1117 
1118 		/*
1119 		 *	The mask we set must be legal.
1120 		 */
1121 		ret = -EINVAL;
1122 		if (bad_mask(sin->sin_addr.s_addr, 0))
1123 			break;
1124 		ret = 0;
1125 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1126 			__be32 old_mask = ifa->ifa_mask;
1127 			inet_del_ifa(in_dev, ifap, 0);
1128 			ifa->ifa_mask = sin->sin_addr.s_addr;
1129 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1130 
1131 			/* See if current broadcast address matches
1132 			 * with current netmask, then recalculate
1133 			 * the broadcast address. Otherwise it's a
1134 			 * funny address, so don't touch it since
1135 			 * the user seems to know what (s)he's doing...
1136 			 */
1137 			if ((dev->flags & IFF_BROADCAST) &&
1138 			    (ifa->ifa_prefixlen < 31) &&
1139 			    (ifa->ifa_broadcast ==
1140 			     (ifa->ifa_local|~old_mask))) {
1141 				ifa->ifa_broadcast = (ifa->ifa_local |
1142 						      ~sin->sin_addr.s_addr);
1143 			}
1144 			inet_insert_ifa(ifa);
1145 		}
1146 		break;
1147 	}
1148 done:
1149 	rtnl_unlock();
1150 out:
1151 	return ret;
1152 rarok:
1153 	rtnl_unlock();
1154 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1155 	goto out;
1156 }
1157 
1158 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1159 {
1160 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1161 	struct in_ifaddr *ifa;
1162 	struct ifreq ifr;
1163 	int done = 0;
1164 
1165 	if (!in_dev)
1166 		goto out;
1167 
1168 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1169 		if (!buf) {
1170 			done += sizeof(ifr);
1171 			continue;
1172 		}
1173 		if (len < (int) sizeof(ifr))
1174 			break;
1175 		memset(&ifr, 0, sizeof(struct ifreq));
1176 		strcpy(ifr.ifr_name, ifa->ifa_label);
1177 
1178 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1179 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1180 								ifa->ifa_local;
1181 
1182 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1183 			done = -EFAULT;
1184 			break;
1185 		}
1186 		buf  += sizeof(struct ifreq);
1187 		len  -= sizeof(struct ifreq);
1188 		done += sizeof(struct ifreq);
1189 	}
1190 out:
1191 	return done;
1192 }
1193 
1194 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1195 {
1196 	__be32 addr = 0;
1197 	struct in_device *in_dev;
1198 	struct net *net = dev_net(dev);
1199 	int master_idx;
1200 
1201 	rcu_read_lock();
1202 	in_dev = __in_dev_get_rcu(dev);
1203 	if (!in_dev)
1204 		goto no_in_dev;
1205 
1206 	for_primary_ifa(in_dev) {
1207 		if (ifa->ifa_scope > scope)
1208 			continue;
1209 		if (!dst || inet_ifa_match(dst, ifa)) {
1210 			addr = ifa->ifa_local;
1211 			break;
1212 		}
1213 		if (!addr)
1214 			addr = ifa->ifa_local;
1215 	} endfor_ifa(in_dev);
1216 
1217 	if (addr)
1218 		goto out_unlock;
1219 no_in_dev:
1220 	master_idx = l3mdev_master_ifindex_rcu(dev);
1221 
1222 	/* For VRFs, the VRF device takes the place of the loopback device,
1223 	 * with addresses on it being preferred.  Note in such cases the
1224 	 * loopback device will be among the devices that fail the master_idx
1225 	 * equality check in the loop below.
1226 	 */
1227 	if (master_idx &&
1228 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1229 	    (in_dev = __in_dev_get_rcu(dev))) {
1230 		for_primary_ifa(in_dev) {
1231 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1232 			    ifa->ifa_scope <= scope) {
1233 				addr = ifa->ifa_local;
1234 				goto out_unlock;
1235 			}
1236 		} endfor_ifa(in_dev);
1237 	}
1238 
1239 	/* Not loopback addresses on loopback should be preferred
1240 	   in this case. It is important that lo is the first interface
1241 	   in dev_base list.
1242 	 */
1243 	for_each_netdev_rcu(net, dev) {
1244 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1245 			continue;
1246 
1247 		in_dev = __in_dev_get_rcu(dev);
1248 		if (!in_dev)
1249 			continue;
1250 
1251 		for_primary_ifa(in_dev) {
1252 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1253 			    ifa->ifa_scope <= scope) {
1254 				addr = ifa->ifa_local;
1255 				goto out_unlock;
1256 			}
1257 		} endfor_ifa(in_dev);
1258 	}
1259 out_unlock:
1260 	rcu_read_unlock();
1261 	return addr;
1262 }
1263 EXPORT_SYMBOL(inet_select_addr);
1264 
1265 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1266 			      __be32 local, int scope)
1267 {
1268 	int same = 0;
1269 	__be32 addr = 0;
1270 
1271 	for_ifa(in_dev) {
1272 		if (!addr &&
1273 		    (local == ifa->ifa_local || !local) &&
1274 		    ifa->ifa_scope <= scope) {
1275 			addr = ifa->ifa_local;
1276 			if (same)
1277 				break;
1278 		}
1279 		if (!same) {
1280 			same = (!local || inet_ifa_match(local, ifa)) &&
1281 				(!dst || inet_ifa_match(dst, ifa));
1282 			if (same && addr) {
1283 				if (local || !dst)
1284 					break;
1285 				/* Is the selected addr into dst subnet? */
1286 				if (inet_ifa_match(addr, ifa))
1287 					break;
1288 				/* No, then can we use new local src? */
1289 				if (ifa->ifa_scope <= scope) {
1290 					addr = ifa->ifa_local;
1291 					break;
1292 				}
1293 				/* search for large dst subnet for addr */
1294 				same = 0;
1295 			}
1296 		}
1297 	} endfor_ifa(in_dev);
1298 
1299 	return same ? addr : 0;
1300 }
1301 
1302 /*
1303  * Confirm that local IP address exists using wildcards:
1304  * - net: netns to check, cannot be NULL
1305  * - in_dev: only on this interface, NULL=any interface
1306  * - dst: only in the same subnet as dst, 0=any dst
1307  * - local: address, 0=autoselect the local address
1308  * - scope: maximum allowed scope value for the local address
1309  */
1310 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1311 			 __be32 dst, __be32 local, int scope)
1312 {
1313 	__be32 addr = 0;
1314 	struct net_device *dev;
1315 
1316 	if (in_dev)
1317 		return confirm_addr_indev(in_dev, dst, local, scope);
1318 
1319 	rcu_read_lock();
1320 	for_each_netdev_rcu(net, dev) {
1321 		in_dev = __in_dev_get_rcu(dev);
1322 		if (in_dev) {
1323 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1324 			if (addr)
1325 				break;
1326 		}
1327 	}
1328 	rcu_read_unlock();
1329 
1330 	return addr;
1331 }
1332 EXPORT_SYMBOL(inet_confirm_addr);
1333 
1334 /*
1335  *	Device notifier
1336  */
1337 
1338 int register_inetaddr_notifier(struct notifier_block *nb)
1339 {
1340 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1341 }
1342 EXPORT_SYMBOL(register_inetaddr_notifier);
1343 
1344 int unregister_inetaddr_notifier(struct notifier_block *nb)
1345 {
1346 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1347 }
1348 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1349 
1350 /* Rename ifa_labels for a device name change. Make some effort to preserve
1351  * existing alias numbering and to create unique labels if possible.
1352 */
1353 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1354 {
1355 	struct in_ifaddr *ifa;
1356 	int named = 0;
1357 
1358 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1359 		char old[IFNAMSIZ], *dot;
1360 
1361 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1362 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1363 		if (named++ == 0)
1364 			goto skip;
1365 		dot = strchr(old, ':');
1366 		if (!dot) {
1367 			sprintf(old, ":%d", named);
1368 			dot = old;
1369 		}
1370 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1371 			strcat(ifa->ifa_label, dot);
1372 		else
1373 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1374 skip:
1375 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1376 	}
1377 }
1378 
1379 static bool inetdev_valid_mtu(unsigned int mtu)
1380 {
1381 	return mtu >= 68;
1382 }
1383 
1384 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1385 					struct in_device *in_dev)
1386 
1387 {
1388 	struct in_ifaddr *ifa;
1389 
1390 	for (ifa = in_dev->ifa_list; ifa;
1391 	     ifa = ifa->ifa_next) {
1392 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1393 			 ifa->ifa_local, dev,
1394 			 ifa->ifa_local, NULL,
1395 			 dev->dev_addr, NULL);
1396 	}
1397 }
1398 
1399 /* Called only under RTNL semaphore */
1400 
1401 static int inetdev_event(struct notifier_block *this, unsigned long event,
1402 			 void *ptr)
1403 {
1404 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1405 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1406 
1407 	ASSERT_RTNL();
1408 
1409 	if (!in_dev) {
1410 		if (event == NETDEV_REGISTER) {
1411 			in_dev = inetdev_init(dev);
1412 			if (IS_ERR(in_dev))
1413 				return notifier_from_errno(PTR_ERR(in_dev));
1414 			if (dev->flags & IFF_LOOPBACK) {
1415 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1416 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1417 			}
1418 		} else if (event == NETDEV_CHANGEMTU) {
1419 			/* Re-enabling IP */
1420 			if (inetdev_valid_mtu(dev->mtu))
1421 				in_dev = inetdev_init(dev);
1422 		}
1423 		goto out;
1424 	}
1425 
1426 	switch (event) {
1427 	case NETDEV_REGISTER:
1428 		pr_debug("%s: bug\n", __func__);
1429 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1430 		break;
1431 	case NETDEV_UP:
1432 		if (!inetdev_valid_mtu(dev->mtu))
1433 			break;
1434 		if (dev->flags & IFF_LOOPBACK) {
1435 			struct in_ifaddr *ifa = inet_alloc_ifa();
1436 
1437 			if (ifa) {
1438 				INIT_HLIST_NODE(&ifa->hash);
1439 				ifa->ifa_local =
1440 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1441 				ifa->ifa_prefixlen = 8;
1442 				ifa->ifa_mask = inet_make_mask(8);
1443 				in_dev_hold(in_dev);
1444 				ifa->ifa_dev = in_dev;
1445 				ifa->ifa_scope = RT_SCOPE_HOST;
1446 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1447 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1448 						 INFINITY_LIFE_TIME);
1449 				ipv4_devconf_setall(in_dev);
1450 				neigh_parms_data_state_setall(in_dev->arp_parms);
1451 				inet_insert_ifa(ifa);
1452 			}
1453 		}
1454 		ip_mc_up(in_dev);
1455 		/* fall through */
1456 	case NETDEV_CHANGEADDR:
1457 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1458 			break;
1459 		/* fall through */
1460 	case NETDEV_NOTIFY_PEERS:
1461 		/* Send gratuitous ARP to notify of link change */
1462 		inetdev_send_gratuitous_arp(dev, in_dev);
1463 		break;
1464 	case NETDEV_DOWN:
1465 		ip_mc_down(in_dev);
1466 		break;
1467 	case NETDEV_PRE_TYPE_CHANGE:
1468 		ip_mc_unmap(in_dev);
1469 		break;
1470 	case NETDEV_POST_TYPE_CHANGE:
1471 		ip_mc_remap(in_dev);
1472 		break;
1473 	case NETDEV_CHANGEMTU:
1474 		if (inetdev_valid_mtu(dev->mtu))
1475 			break;
1476 		/* disable IP when MTU is not enough */
1477 	case NETDEV_UNREGISTER:
1478 		inetdev_destroy(in_dev);
1479 		break;
1480 	case NETDEV_CHANGENAME:
1481 		/* Do not notify about label change, this event is
1482 		 * not interesting to applications using netlink.
1483 		 */
1484 		inetdev_changename(dev, in_dev);
1485 
1486 		devinet_sysctl_unregister(in_dev);
1487 		devinet_sysctl_register(in_dev);
1488 		break;
1489 	}
1490 out:
1491 	return NOTIFY_DONE;
1492 }
1493 
1494 static struct notifier_block ip_netdev_notifier = {
1495 	.notifier_call = inetdev_event,
1496 };
1497 
1498 static size_t inet_nlmsg_size(void)
1499 {
1500 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1501 	       + nla_total_size(4) /* IFA_ADDRESS */
1502 	       + nla_total_size(4) /* IFA_LOCAL */
1503 	       + nla_total_size(4) /* IFA_BROADCAST */
1504 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1505 	       + nla_total_size(4)  /* IFA_FLAGS */
1506 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1507 }
1508 
1509 static inline u32 cstamp_delta(unsigned long cstamp)
1510 {
1511 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1512 }
1513 
1514 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1515 			 unsigned long tstamp, u32 preferred, u32 valid)
1516 {
1517 	struct ifa_cacheinfo ci;
1518 
1519 	ci.cstamp = cstamp_delta(cstamp);
1520 	ci.tstamp = cstamp_delta(tstamp);
1521 	ci.ifa_prefered = preferred;
1522 	ci.ifa_valid = valid;
1523 
1524 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1525 }
1526 
1527 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1528 			    u32 portid, u32 seq, int event, unsigned int flags)
1529 {
1530 	struct ifaddrmsg *ifm;
1531 	struct nlmsghdr  *nlh;
1532 	u32 preferred, valid;
1533 
1534 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1535 	if (!nlh)
1536 		return -EMSGSIZE;
1537 
1538 	ifm = nlmsg_data(nlh);
1539 	ifm->ifa_family = AF_INET;
1540 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1541 	ifm->ifa_flags = ifa->ifa_flags;
1542 	ifm->ifa_scope = ifa->ifa_scope;
1543 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1544 
1545 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1546 		preferred = ifa->ifa_preferred_lft;
1547 		valid = ifa->ifa_valid_lft;
1548 		if (preferred != INFINITY_LIFE_TIME) {
1549 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1550 
1551 			if (preferred > tval)
1552 				preferred -= tval;
1553 			else
1554 				preferred = 0;
1555 			if (valid != INFINITY_LIFE_TIME) {
1556 				if (valid > tval)
1557 					valid -= tval;
1558 				else
1559 					valid = 0;
1560 			}
1561 		}
1562 	} else {
1563 		preferred = INFINITY_LIFE_TIME;
1564 		valid = INFINITY_LIFE_TIME;
1565 	}
1566 	if ((ifa->ifa_address &&
1567 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1568 	    (ifa->ifa_local &&
1569 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1570 	    (ifa->ifa_broadcast &&
1571 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1572 	    (ifa->ifa_label[0] &&
1573 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1574 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1575 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1576 			  preferred, valid))
1577 		goto nla_put_failure;
1578 
1579 	nlmsg_end(skb, nlh);
1580 	return 0;
1581 
1582 nla_put_failure:
1583 	nlmsg_cancel(skb, nlh);
1584 	return -EMSGSIZE;
1585 }
1586 
1587 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1588 {
1589 	struct net *net = sock_net(skb->sk);
1590 	int h, s_h;
1591 	int idx, s_idx;
1592 	int ip_idx, s_ip_idx;
1593 	struct net_device *dev;
1594 	struct in_device *in_dev;
1595 	struct in_ifaddr *ifa;
1596 	struct hlist_head *head;
1597 
1598 	s_h = cb->args[0];
1599 	s_idx = idx = cb->args[1];
1600 	s_ip_idx = ip_idx = cb->args[2];
1601 
1602 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1603 		idx = 0;
1604 		head = &net->dev_index_head[h];
1605 		rcu_read_lock();
1606 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1607 			  net->dev_base_seq;
1608 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1609 			if (idx < s_idx)
1610 				goto cont;
1611 			if (h > s_h || idx > s_idx)
1612 				s_ip_idx = 0;
1613 			in_dev = __in_dev_get_rcu(dev);
1614 			if (!in_dev)
1615 				goto cont;
1616 
1617 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1618 			     ifa = ifa->ifa_next, ip_idx++) {
1619 				if (ip_idx < s_ip_idx)
1620 					continue;
1621 				if (inet_fill_ifaddr(skb, ifa,
1622 					     NETLINK_CB(cb->skb).portid,
1623 					     cb->nlh->nlmsg_seq,
1624 					     RTM_NEWADDR, NLM_F_MULTI) < 0) {
1625 					rcu_read_unlock();
1626 					goto done;
1627 				}
1628 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1629 			}
1630 cont:
1631 			idx++;
1632 		}
1633 		rcu_read_unlock();
1634 	}
1635 
1636 done:
1637 	cb->args[0] = h;
1638 	cb->args[1] = idx;
1639 	cb->args[2] = ip_idx;
1640 
1641 	return skb->len;
1642 }
1643 
1644 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1645 		      u32 portid)
1646 {
1647 	struct sk_buff *skb;
1648 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1649 	int err = -ENOBUFS;
1650 	struct net *net;
1651 
1652 	net = dev_net(ifa->ifa_dev->dev);
1653 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1654 	if (!skb)
1655 		goto errout;
1656 
1657 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1658 	if (err < 0) {
1659 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1660 		WARN_ON(err == -EMSGSIZE);
1661 		kfree_skb(skb);
1662 		goto errout;
1663 	}
1664 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1665 	return;
1666 errout:
1667 	if (err < 0)
1668 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1669 }
1670 
1671 static size_t inet_get_link_af_size(const struct net_device *dev,
1672 				    u32 ext_filter_mask)
1673 {
1674 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1675 
1676 	if (!in_dev)
1677 		return 0;
1678 
1679 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1680 }
1681 
1682 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1683 			     u32 ext_filter_mask)
1684 {
1685 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1686 	struct nlattr *nla;
1687 	int i;
1688 
1689 	if (!in_dev)
1690 		return -ENODATA;
1691 
1692 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1693 	if (!nla)
1694 		return -EMSGSIZE;
1695 
1696 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1697 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1698 
1699 	return 0;
1700 }
1701 
1702 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1703 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1704 };
1705 
1706 static int inet_validate_link_af(const struct net_device *dev,
1707 				 const struct nlattr *nla)
1708 {
1709 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1710 	int err, rem;
1711 
1712 	if (dev && !__in_dev_get_rtnl(dev))
1713 		return -EAFNOSUPPORT;
1714 
1715 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1716 	if (err < 0)
1717 		return err;
1718 
1719 	if (tb[IFLA_INET_CONF]) {
1720 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1721 			int cfgid = nla_type(a);
1722 
1723 			if (nla_len(a) < 4)
1724 				return -EINVAL;
1725 
1726 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1727 				return -EINVAL;
1728 		}
1729 	}
1730 
1731 	return 0;
1732 }
1733 
1734 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1735 {
1736 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1737 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1738 	int rem;
1739 
1740 	if (!in_dev)
1741 		return -EAFNOSUPPORT;
1742 
1743 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1744 		BUG();
1745 
1746 	if (tb[IFLA_INET_CONF]) {
1747 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1748 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1749 	}
1750 
1751 	return 0;
1752 }
1753 
1754 static int inet_netconf_msgsize_devconf(int type)
1755 {
1756 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1757 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1758 	bool all = false;
1759 
1760 	if (type == NETCONFA_ALL)
1761 		all = true;
1762 
1763 	if (all || type == NETCONFA_FORWARDING)
1764 		size += nla_total_size(4);
1765 	if (all || type == NETCONFA_RP_FILTER)
1766 		size += nla_total_size(4);
1767 	if (all || type == NETCONFA_MC_FORWARDING)
1768 		size += nla_total_size(4);
1769 	if (all || type == NETCONFA_PROXY_NEIGH)
1770 		size += nla_total_size(4);
1771 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1772 		size += nla_total_size(4);
1773 
1774 	return size;
1775 }
1776 
1777 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1778 				     struct ipv4_devconf *devconf, u32 portid,
1779 				     u32 seq, int event, unsigned int flags,
1780 				     int type)
1781 {
1782 	struct nlmsghdr  *nlh;
1783 	struct netconfmsg *ncm;
1784 	bool all = false;
1785 
1786 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1787 			flags);
1788 	if (!nlh)
1789 		return -EMSGSIZE;
1790 
1791 	if (type == NETCONFA_ALL)
1792 		all = true;
1793 
1794 	ncm = nlmsg_data(nlh);
1795 	ncm->ncm_family = AF_INET;
1796 
1797 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1798 		goto nla_put_failure;
1799 
1800 	if ((all || type == NETCONFA_FORWARDING) &&
1801 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1802 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1803 		goto nla_put_failure;
1804 	if ((all || type == NETCONFA_RP_FILTER) &&
1805 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1806 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1807 		goto nla_put_failure;
1808 	if ((all || type == NETCONFA_MC_FORWARDING) &&
1809 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1810 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1811 		goto nla_put_failure;
1812 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
1813 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1814 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1815 		goto nla_put_failure;
1816 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1817 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1818 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1819 		goto nla_put_failure;
1820 
1821 	nlmsg_end(skb, nlh);
1822 	return 0;
1823 
1824 nla_put_failure:
1825 	nlmsg_cancel(skb, nlh);
1826 	return -EMSGSIZE;
1827 }
1828 
1829 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1830 				 struct ipv4_devconf *devconf)
1831 {
1832 	struct sk_buff *skb;
1833 	int err = -ENOBUFS;
1834 
1835 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1836 	if (!skb)
1837 		goto errout;
1838 
1839 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1840 					RTM_NEWNETCONF, 0, type);
1841 	if (err < 0) {
1842 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1843 		WARN_ON(err == -EMSGSIZE);
1844 		kfree_skb(skb);
1845 		goto errout;
1846 	}
1847 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1848 	return;
1849 errout:
1850 	if (err < 0)
1851 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1852 }
1853 
1854 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1855 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1856 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1857 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1858 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1859 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
1860 };
1861 
1862 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1863 				    struct nlmsghdr *nlh)
1864 {
1865 	struct net *net = sock_net(in_skb->sk);
1866 	struct nlattr *tb[NETCONFA_MAX+1];
1867 	struct netconfmsg *ncm;
1868 	struct sk_buff *skb;
1869 	struct ipv4_devconf *devconf;
1870 	struct in_device *in_dev;
1871 	struct net_device *dev;
1872 	int ifindex;
1873 	int err;
1874 
1875 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1876 			  devconf_ipv4_policy);
1877 	if (err < 0)
1878 		goto errout;
1879 
1880 	err = -EINVAL;
1881 	if (!tb[NETCONFA_IFINDEX])
1882 		goto errout;
1883 
1884 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1885 	switch (ifindex) {
1886 	case NETCONFA_IFINDEX_ALL:
1887 		devconf = net->ipv4.devconf_all;
1888 		break;
1889 	case NETCONFA_IFINDEX_DEFAULT:
1890 		devconf = net->ipv4.devconf_dflt;
1891 		break;
1892 	default:
1893 		dev = __dev_get_by_index(net, ifindex);
1894 		if (!dev)
1895 			goto errout;
1896 		in_dev = __in_dev_get_rtnl(dev);
1897 		if (!in_dev)
1898 			goto errout;
1899 		devconf = &in_dev->cnf;
1900 		break;
1901 	}
1902 
1903 	err = -ENOBUFS;
1904 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1905 	if (!skb)
1906 		goto errout;
1907 
1908 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1909 					NETLINK_CB(in_skb).portid,
1910 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1911 					NETCONFA_ALL);
1912 	if (err < 0) {
1913 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1914 		WARN_ON(err == -EMSGSIZE);
1915 		kfree_skb(skb);
1916 		goto errout;
1917 	}
1918 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1919 errout:
1920 	return err;
1921 }
1922 
1923 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1924 				     struct netlink_callback *cb)
1925 {
1926 	struct net *net = sock_net(skb->sk);
1927 	int h, s_h;
1928 	int idx, s_idx;
1929 	struct net_device *dev;
1930 	struct in_device *in_dev;
1931 	struct hlist_head *head;
1932 
1933 	s_h = cb->args[0];
1934 	s_idx = idx = cb->args[1];
1935 
1936 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1937 		idx = 0;
1938 		head = &net->dev_index_head[h];
1939 		rcu_read_lock();
1940 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1941 			  net->dev_base_seq;
1942 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1943 			if (idx < s_idx)
1944 				goto cont;
1945 			in_dev = __in_dev_get_rcu(dev);
1946 			if (!in_dev)
1947 				goto cont;
1948 
1949 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1950 						      &in_dev->cnf,
1951 						      NETLINK_CB(cb->skb).portid,
1952 						      cb->nlh->nlmsg_seq,
1953 						      RTM_NEWNETCONF,
1954 						      NLM_F_MULTI,
1955 						      NETCONFA_ALL) < 0) {
1956 				rcu_read_unlock();
1957 				goto done;
1958 			}
1959 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1960 cont:
1961 			idx++;
1962 		}
1963 		rcu_read_unlock();
1964 	}
1965 	if (h == NETDEV_HASHENTRIES) {
1966 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1967 					      net->ipv4.devconf_all,
1968 					      NETLINK_CB(cb->skb).portid,
1969 					      cb->nlh->nlmsg_seq,
1970 					      RTM_NEWNETCONF, NLM_F_MULTI,
1971 					      NETCONFA_ALL) < 0)
1972 			goto done;
1973 		else
1974 			h++;
1975 	}
1976 	if (h == NETDEV_HASHENTRIES + 1) {
1977 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1978 					      net->ipv4.devconf_dflt,
1979 					      NETLINK_CB(cb->skb).portid,
1980 					      cb->nlh->nlmsg_seq,
1981 					      RTM_NEWNETCONF, NLM_F_MULTI,
1982 					      NETCONFA_ALL) < 0)
1983 			goto done;
1984 		else
1985 			h++;
1986 	}
1987 done:
1988 	cb->args[0] = h;
1989 	cb->args[1] = idx;
1990 
1991 	return skb->len;
1992 }
1993 
1994 #ifdef CONFIG_SYSCTL
1995 
1996 static void devinet_copy_dflt_conf(struct net *net, int i)
1997 {
1998 	struct net_device *dev;
1999 
2000 	rcu_read_lock();
2001 	for_each_netdev_rcu(net, dev) {
2002 		struct in_device *in_dev;
2003 
2004 		in_dev = __in_dev_get_rcu(dev);
2005 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2006 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2007 	}
2008 	rcu_read_unlock();
2009 }
2010 
2011 /* called with RTNL locked */
2012 static void inet_forward_change(struct net *net)
2013 {
2014 	struct net_device *dev;
2015 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2016 
2017 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2018 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2019 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2020 				    NETCONFA_IFINDEX_ALL,
2021 				    net->ipv4.devconf_all);
2022 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2023 				    NETCONFA_IFINDEX_DEFAULT,
2024 				    net->ipv4.devconf_dflt);
2025 
2026 	for_each_netdev(net, dev) {
2027 		struct in_device *in_dev;
2028 
2029 		if (on)
2030 			dev_disable_lro(dev);
2031 
2032 		in_dev = __in_dev_get_rtnl(dev);
2033 		if (in_dev) {
2034 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2035 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2036 						    dev->ifindex, &in_dev->cnf);
2037 		}
2038 	}
2039 }
2040 
2041 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2042 {
2043 	if (cnf == net->ipv4.devconf_dflt)
2044 		return NETCONFA_IFINDEX_DEFAULT;
2045 	else if (cnf == net->ipv4.devconf_all)
2046 		return NETCONFA_IFINDEX_ALL;
2047 	else {
2048 		struct in_device *idev
2049 			= container_of(cnf, struct in_device, cnf);
2050 		return idev->dev->ifindex;
2051 	}
2052 }
2053 
2054 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2055 			     void __user *buffer,
2056 			     size_t *lenp, loff_t *ppos)
2057 {
2058 	int old_value = *(int *)ctl->data;
2059 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2060 	int new_value = *(int *)ctl->data;
2061 
2062 	if (write) {
2063 		struct ipv4_devconf *cnf = ctl->extra1;
2064 		struct net *net = ctl->extra2;
2065 		int i = (int *)ctl->data - cnf->data;
2066 		int ifindex;
2067 
2068 		set_bit(i, cnf->state);
2069 
2070 		if (cnf == net->ipv4.devconf_dflt)
2071 			devinet_copy_dflt_conf(net, i);
2072 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2073 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2074 			if ((new_value == 0) && (old_value != 0))
2075 				rt_cache_flush(net);
2076 
2077 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2078 		    new_value != old_value) {
2079 			ifindex = devinet_conf_ifindex(net, cnf);
2080 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2081 						    ifindex, cnf);
2082 		}
2083 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2084 		    new_value != old_value) {
2085 			ifindex = devinet_conf_ifindex(net, cnf);
2086 			inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2087 						    ifindex, cnf);
2088 		}
2089 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2090 		    new_value != old_value) {
2091 			ifindex = devinet_conf_ifindex(net, cnf);
2092 			inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2093 						    ifindex, cnf);
2094 		}
2095 	}
2096 
2097 	return ret;
2098 }
2099 
2100 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2101 				  void __user *buffer,
2102 				  size_t *lenp, loff_t *ppos)
2103 {
2104 	int *valp = ctl->data;
2105 	int val = *valp;
2106 	loff_t pos = *ppos;
2107 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2108 
2109 	if (write && *valp != val) {
2110 		struct net *net = ctl->extra2;
2111 
2112 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2113 			if (!rtnl_trylock()) {
2114 				/* Restore the original values before restarting */
2115 				*valp = val;
2116 				*ppos = pos;
2117 				return restart_syscall();
2118 			}
2119 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2120 				inet_forward_change(net);
2121 			} else {
2122 				struct ipv4_devconf *cnf = ctl->extra1;
2123 				struct in_device *idev =
2124 					container_of(cnf, struct in_device, cnf);
2125 				if (*valp)
2126 					dev_disable_lro(idev->dev);
2127 				inet_netconf_notify_devconf(net,
2128 							    NETCONFA_FORWARDING,
2129 							    idev->dev->ifindex,
2130 							    cnf);
2131 			}
2132 			rtnl_unlock();
2133 			rt_cache_flush(net);
2134 		} else
2135 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2136 						    NETCONFA_IFINDEX_DEFAULT,
2137 						    net->ipv4.devconf_dflt);
2138 	}
2139 
2140 	return ret;
2141 }
2142 
2143 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2144 				void __user *buffer,
2145 				size_t *lenp, loff_t *ppos)
2146 {
2147 	int *valp = ctl->data;
2148 	int val = *valp;
2149 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2150 	struct net *net = ctl->extra2;
2151 
2152 	if (write && *valp != val)
2153 		rt_cache_flush(net);
2154 
2155 	return ret;
2156 }
2157 
2158 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2159 	{ \
2160 		.procname	= name, \
2161 		.data		= ipv4_devconf.data + \
2162 				  IPV4_DEVCONF_ ## attr - 1, \
2163 		.maxlen		= sizeof(int), \
2164 		.mode		= mval, \
2165 		.proc_handler	= proc, \
2166 		.extra1		= &ipv4_devconf, \
2167 	}
2168 
2169 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2170 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2171 
2172 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2173 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2174 
2175 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2176 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2177 
2178 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2179 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2180 
2181 static struct devinet_sysctl_table {
2182 	struct ctl_table_header *sysctl_header;
2183 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2184 } devinet_sysctl = {
2185 	.devinet_vars = {
2186 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2187 					     devinet_sysctl_forward),
2188 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2189 
2190 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2191 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2192 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2193 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2194 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2195 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2196 					"accept_source_route"),
2197 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2198 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2199 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2200 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2201 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2202 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2203 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2204 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2205 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2206 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2207 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2208 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2209 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2210 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2211 					"force_igmp_version"),
2212 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2213 					"igmpv2_unsolicited_report_interval"),
2214 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2215 					"igmpv3_unsolicited_report_interval"),
2216 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2217 					"ignore_routes_with_linkdown"),
2218 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2219 					"drop_gratuitous_arp"),
2220 
2221 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2222 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2223 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2224 					      "promote_secondaries"),
2225 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2226 					      "route_localnet"),
2227 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2228 					      "drop_unicast_in_l2_multicast"),
2229 	},
2230 };
2231 
2232 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2233 				     int ifindex, struct ipv4_devconf *p)
2234 {
2235 	int i;
2236 	struct devinet_sysctl_table *t;
2237 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2238 
2239 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2240 	if (!t)
2241 		goto out;
2242 
2243 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2244 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2245 		t->devinet_vars[i].extra1 = p;
2246 		t->devinet_vars[i].extra2 = net;
2247 	}
2248 
2249 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2250 
2251 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2252 	if (!t->sysctl_header)
2253 		goto free;
2254 
2255 	p->sysctl = t;
2256 
2257 	inet_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
2258 	return 0;
2259 
2260 free:
2261 	kfree(t);
2262 out:
2263 	return -ENOBUFS;
2264 }
2265 
2266 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2267 {
2268 	struct devinet_sysctl_table *t = cnf->sysctl;
2269 
2270 	if (!t)
2271 		return;
2272 
2273 	cnf->sysctl = NULL;
2274 	unregister_net_sysctl_table(t->sysctl_header);
2275 	kfree(t);
2276 }
2277 
2278 static int devinet_sysctl_register(struct in_device *idev)
2279 {
2280 	int err;
2281 
2282 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2283 		return -EINVAL;
2284 
2285 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2286 	if (err)
2287 		return err;
2288 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2289 					idev->dev->ifindex, &idev->cnf);
2290 	if (err)
2291 		neigh_sysctl_unregister(idev->arp_parms);
2292 	return err;
2293 }
2294 
2295 static void devinet_sysctl_unregister(struct in_device *idev)
2296 {
2297 	__devinet_sysctl_unregister(&idev->cnf);
2298 	neigh_sysctl_unregister(idev->arp_parms);
2299 }
2300 
2301 static struct ctl_table ctl_forward_entry[] = {
2302 	{
2303 		.procname	= "ip_forward",
2304 		.data		= &ipv4_devconf.data[
2305 					IPV4_DEVCONF_FORWARDING - 1],
2306 		.maxlen		= sizeof(int),
2307 		.mode		= 0644,
2308 		.proc_handler	= devinet_sysctl_forward,
2309 		.extra1		= &ipv4_devconf,
2310 		.extra2		= &init_net,
2311 	},
2312 	{ },
2313 };
2314 #endif
2315 
2316 static __net_init int devinet_init_net(struct net *net)
2317 {
2318 	int err;
2319 	struct ipv4_devconf *all, *dflt;
2320 #ifdef CONFIG_SYSCTL
2321 	struct ctl_table *tbl = ctl_forward_entry;
2322 	struct ctl_table_header *forw_hdr;
2323 #endif
2324 
2325 	err = -ENOMEM;
2326 	all = &ipv4_devconf;
2327 	dflt = &ipv4_devconf_dflt;
2328 
2329 	if (!net_eq(net, &init_net)) {
2330 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2331 		if (!all)
2332 			goto err_alloc_all;
2333 
2334 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2335 		if (!dflt)
2336 			goto err_alloc_dflt;
2337 
2338 #ifdef CONFIG_SYSCTL
2339 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2340 		if (!tbl)
2341 			goto err_alloc_ctl;
2342 
2343 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2344 		tbl[0].extra1 = all;
2345 		tbl[0].extra2 = net;
2346 #endif
2347 	}
2348 
2349 #ifdef CONFIG_SYSCTL
2350 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2351 	if (err < 0)
2352 		goto err_reg_all;
2353 
2354 	err = __devinet_sysctl_register(net, "default",
2355 					NETCONFA_IFINDEX_DEFAULT, dflt);
2356 	if (err < 0)
2357 		goto err_reg_dflt;
2358 
2359 	err = -ENOMEM;
2360 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2361 	if (!forw_hdr)
2362 		goto err_reg_ctl;
2363 	net->ipv4.forw_hdr = forw_hdr;
2364 #endif
2365 
2366 	net->ipv4.devconf_all = all;
2367 	net->ipv4.devconf_dflt = dflt;
2368 	return 0;
2369 
2370 #ifdef CONFIG_SYSCTL
2371 err_reg_ctl:
2372 	__devinet_sysctl_unregister(dflt);
2373 err_reg_dflt:
2374 	__devinet_sysctl_unregister(all);
2375 err_reg_all:
2376 	if (tbl != ctl_forward_entry)
2377 		kfree(tbl);
2378 err_alloc_ctl:
2379 #endif
2380 	if (dflt != &ipv4_devconf_dflt)
2381 		kfree(dflt);
2382 err_alloc_dflt:
2383 	if (all != &ipv4_devconf)
2384 		kfree(all);
2385 err_alloc_all:
2386 	return err;
2387 }
2388 
2389 static __net_exit void devinet_exit_net(struct net *net)
2390 {
2391 #ifdef CONFIG_SYSCTL
2392 	struct ctl_table *tbl;
2393 
2394 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2395 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2396 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2397 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2398 	kfree(tbl);
2399 #endif
2400 	kfree(net->ipv4.devconf_dflt);
2401 	kfree(net->ipv4.devconf_all);
2402 }
2403 
2404 static __net_initdata struct pernet_operations devinet_ops = {
2405 	.init = devinet_init_net,
2406 	.exit = devinet_exit_net,
2407 };
2408 
2409 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2410 	.family		  = AF_INET,
2411 	.fill_link_af	  = inet_fill_link_af,
2412 	.get_link_af_size = inet_get_link_af_size,
2413 	.validate_link_af = inet_validate_link_af,
2414 	.set_link_af	  = inet_set_link_af,
2415 };
2416 
2417 void __init devinet_init(void)
2418 {
2419 	int i;
2420 
2421 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2422 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2423 
2424 	register_pernet_subsys(&devinet_ops);
2425 
2426 	register_gifconf(PF_INET, inet_gifconf);
2427 	register_netdevice_notifier(&ip_netdev_notifier);
2428 
2429 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2430 
2431 	rtnl_af_register(&inet_af_ops);
2432 
2433 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2434 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2435 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2436 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2437 		      inet_netconf_dump_devconf, NULL);
2438 }
2439