xref: /openbmc/linux/net/ipv4/devinet.c (revision d8bcaabe)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60 
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77 	},
78 };
79 
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81 	.data = {
82 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89 	},
90 };
91 
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94 
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96 	[IFA_LOCAL]     	= { .type = NLA_U32 },
97 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
98 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
99 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
101 	[IFA_FLAGS]		= { .type = NLA_U32 },
102 };
103 
104 #define IN4_ADDR_HSIZE_SHIFT	8
105 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
106 
107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
108 
109 static u32 inet_addr_hash(const struct net *net, __be32 addr)
110 {
111 	u32 val = (__force u32) addr ^ net_hash_mix(net);
112 
113 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
114 }
115 
116 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
117 {
118 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
119 
120 	ASSERT_RTNL();
121 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
122 }
123 
124 static void inet_hash_remove(struct in_ifaddr *ifa)
125 {
126 	ASSERT_RTNL();
127 	hlist_del_init_rcu(&ifa->hash);
128 }
129 
130 /**
131  * __ip_dev_find - find the first device with a given source address.
132  * @net: the net namespace
133  * @addr: the source address
134  * @devref: if true, take a reference on the found device
135  *
136  * If a caller uses devref=false, it should be protected by RCU, or RTNL
137  */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140 	u32 hash = inet_addr_hash(net, addr);
141 	struct net_device *result = NULL;
142 	struct in_ifaddr *ifa;
143 
144 	rcu_read_lock();
145 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
146 		if (ifa->ifa_local == addr) {
147 			struct net_device *dev = ifa->ifa_dev->dev;
148 
149 			if (!net_eq(dev_net(dev), net))
150 				continue;
151 			result = dev;
152 			break;
153 		}
154 	}
155 	if (!result) {
156 		struct flowi4 fl4 = { .daddr = addr };
157 		struct fib_result res = { 0 };
158 		struct fib_table *local;
159 
160 		/* Fallback to FIB local table so that communication
161 		 * over loopback subnets work.
162 		 */
163 		local = fib_get_table(net, RT_TABLE_LOCAL);
164 		if (local &&
165 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166 		    res.type == RTN_LOCAL)
167 			result = FIB_RES_DEV(res);
168 	}
169 	if (result && devref)
170 		dev_hold(result);
171 	rcu_read_unlock();
172 	return result;
173 }
174 EXPORT_SYMBOL(__ip_dev_find);
175 
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177 
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181 			 int destroy);
182 #ifdef CONFIG_SYSCTL
183 static int devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static int devinet_sysctl_register(struct in_device *idev)
187 {
188 	return 0;
189 }
190 static void devinet_sysctl_unregister(struct in_device *idev)
191 {
192 }
193 #endif
194 
195 /* Locks all the inet devices. */
196 
197 static struct in_ifaddr *inet_alloc_ifa(void)
198 {
199 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
200 }
201 
202 static void inet_rcu_free_ifa(struct rcu_head *head)
203 {
204 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
205 	if (ifa->ifa_dev)
206 		in_dev_put(ifa->ifa_dev);
207 	kfree(ifa);
208 }
209 
210 static void inet_free_ifa(struct in_ifaddr *ifa)
211 {
212 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
213 }
214 
215 void in_dev_finish_destroy(struct in_device *idev)
216 {
217 	struct net_device *dev = idev->dev;
218 
219 	WARN_ON(idev->ifa_list);
220 	WARN_ON(idev->mc_list);
221 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
222 #ifdef NET_REFCNT_DEBUG
223 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
224 #endif
225 	dev_put(dev);
226 	if (!idev->dead)
227 		pr_err("Freeing alive in_device %p\n", idev);
228 	else
229 		kfree(idev);
230 }
231 EXPORT_SYMBOL(in_dev_finish_destroy);
232 
233 static struct in_device *inetdev_init(struct net_device *dev)
234 {
235 	struct in_device *in_dev;
236 	int err = -ENOMEM;
237 
238 	ASSERT_RTNL();
239 
240 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241 	if (!in_dev)
242 		goto out;
243 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244 			sizeof(in_dev->cnf));
245 	in_dev->cnf.sysctl = NULL;
246 	in_dev->dev = dev;
247 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248 	if (!in_dev->arp_parms)
249 		goto out_kfree;
250 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251 		dev_disable_lro(dev);
252 	/* Reference in_dev->dev */
253 	dev_hold(dev);
254 	/* Account for reference dev->ip_ptr (below) */
255 	refcount_set(&in_dev->refcnt, 1);
256 
257 	err = devinet_sysctl_register(in_dev);
258 	if (err) {
259 		in_dev->dead = 1;
260 		in_dev_put(in_dev);
261 		in_dev = NULL;
262 		goto out;
263 	}
264 	ip_mc_init_dev(in_dev);
265 	if (dev->flags & IFF_UP)
266 		ip_mc_up(in_dev);
267 
268 	/* we can receive as soon as ip_ptr is set -- do this last */
269 	rcu_assign_pointer(dev->ip_ptr, in_dev);
270 out:
271 	return in_dev ?: ERR_PTR(err);
272 out_kfree:
273 	kfree(in_dev);
274 	in_dev = NULL;
275 	goto out;
276 }
277 
278 static void in_dev_rcu_put(struct rcu_head *head)
279 {
280 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
281 	in_dev_put(idev);
282 }
283 
284 static void inetdev_destroy(struct in_device *in_dev)
285 {
286 	struct in_ifaddr *ifa;
287 	struct net_device *dev;
288 
289 	ASSERT_RTNL();
290 
291 	dev = in_dev->dev;
292 
293 	in_dev->dead = 1;
294 
295 	ip_mc_destroy_dev(in_dev);
296 
297 	while ((ifa = in_dev->ifa_list) != NULL) {
298 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
299 		inet_free_ifa(ifa);
300 	}
301 
302 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
303 
304 	devinet_sysctl_unregister(in_dev);
305 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
306 	arp_ifdown(dev);
307 
308 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
309 }
310 
311 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
312 {
313 	rcu_read_lock();
314 	for_primary_ifa(in_dev) {
315 		if (inet_ifa_match(a, ifa)) {
316 			if (!b || inet_ifa_match(b, ifa)) {
317 				rcu_read_unlock();
318 				return 1;
319 			}
320 		}
321 	} endfor_ifa(in_dev);
322 	rcu_read_unlock();
323 	return 0;
324 }
325 
326 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327 			 int destroy, struct nlmsghdr *nlh, u32 portid)
328 {
329 	struct in_ifaddr *promote = NULL;
330 	struct in_ifaddr *ifa, *ifa1 = *ifap;
331 	struct in_ifaddr *last_prim = in_dev->ifa_list;
332 	struct in_ifaddr *prev_prom = NULL;
333 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
334 
335 	ASSERT_RTNL();
336 
337 	if (in_dev->dead)
338 		goto no_promotions;
339 
340 	/* 1. Deleting primary ifaddr forces deletion all secondaries
341 	 * unless alias promotion is set
342 	 **/
343 
344 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
345 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
346 
347 		while ((ifa = *ifap1) != NULL) {
348 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
349 			    ifa1->ifa_scope <= ifa->ifa_scope)
350 				last_prim = ifa;
351 
352 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
353 			    ifa1->ifa_mask != ifa->ifa_mask ||
354 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
355 				ifap1 = &ifa->ifa_next;
356 				prev_prom = ifa;
357 				continue;
358 			}
359 
360 			if (!do_promote) {
361 				inet_hash_remove(ifa);
362 				*ifap1 = ifa->ifa_next;
363 
364 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
365 				blocking_notifier_call_chain(&inetaddr_chain,
366 						NETDEV_DOWN, ifa);
367 				inet_free_ifa(ifa);
368 			} else {
369 				promote = ifa;
370 				break;
371 			}
372 		}
373 	}
374 
375 	/* On promotion all secondaries from subnet are changing
376 	 * the primary IP, we must remove all their routes silently
377 	 * and later to add them back with new prefsrc. Do this
378 	 * while all addresses are on the device list.
379 	 */
380 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
381 		if (ifa1->ifa_mask == ifa->ifa_mask &&
382 		    inet_ifa_match(ifa1->ifa_address, ifa))
383 			fib_del_ifaddr(ifa, ifa1);
384 	}
385 
386 no_promotions:
387 	/* 2. Unlink it */
388 
389 	*ifap = ifa1->ifa_next;
390 	inet_hash_remove(ifa1);
391 
392 	/* 3. Announce address deletion */
393 
394 	/* Send message first, then call notifier.
395 	   At first sight, FIB update triggered by notifier
396 	   will refer to already deleted ifaddr, that could confuse
397 	   netlink listeners. It is not true: look, gated sees
398 	   that route deleted and if it still thinks that ifaddr
399 	   is valid, it will try to restore deleted routes... Grr.
400 	   So that, this order is correct.
401 	 */
402 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
403 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
404 
405 	if (promote) {
406 		struct in_ifaddr *next_sec = promote->ifa_next;
407 
408 		if (prev_prom) {
409 			prev_prom->ifa_next = promote->ifa_next;
410 			promote->ifa_next = last_prim->ifa_next;
411 			last_prim->ifa_next = promote;
412 		}
413 
414 		promote->ifa_flags &= ~IFA_F_SECONDARY;
415 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
416 		blocking_notifier_call_chain(&inetaddr_chain,
417 				NETDEV_UP, promote);
418 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
419 			if (ifa1->ifa_mask != ifa->ifa_mask ||
420 			    !inet_ifa_match(ifa1->ifa_address, ifa))
421 					continue;
422 			fib_add_ifaddr(ifa);
423 		}
424 
425 	}
426 	if (destroy)
427 		inet_free_ifa(ifa1);
428 }
429 
430 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
431 			 int destroy)
432 {
433 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
434 }
435 
436 static void check_lifetime(struct work_struct *work);
437 
438 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
439 
440 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
441 			     u32 portid)
442 {
443 	struct in_device *in_dev = ifa->ifa_dev;
444 	struct in_ifaddr *ifa1, **ifap, **last_primary;
445 	struct in_validator_info ivi;
446 	int ret;
447 
448 	ASSERT_RTNL();
449 
450 	if (!ifa->ifa_local) {
451 		inet_free_ifa(ifa);
452 		return 0;
453 	}
454 
455 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
456 	last_primary = &in_dev->ifa_list;
457 
458 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
459 	     ifap = &ifa1->ifa_next) {
460 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
461 		    ifa->ifa_scope <= ifa1->ifa_scope)
462 			last_primary = &ifa1->ifa_next;
463 		if (ifa1->ifa_mask == ifa->ifa_mask &&
464 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
465 			if (ifa1->ifa_local == ifa->ifa_local) {
466 				inet_free_ifa(ifa);
467 				return -EEXIST;
468 			}
469 			if (ifa1->ifa_scope != ifa->ifa_scope) {
470 				inet_free_ifa(ifa);
471 				return -EINVAL;
472 			}
473 			ifa->ifa_flags |= IFA_F_SECONDARY;
474 		}
475 	}
476 
477 	/* Allow any devices that wish to register ifaddr validtors to weigh
478 	 * in now, before changes are committed.  The rntl lock is serializing
479 	 * access here, so the state should not change between a validator call
480 	 * and a final notify on commit.  This isn't invoked on promotion under
481 	 * the assumption that validators are checking the address itself, and
482 	 * not the flags.
483 	 */
484 	ivi.ivi_addr = ifa->ifa_address;
485 	ivi.ivi_dev = ifa->ifa_dev;
486 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
487 					   NETDEV_UP, &ivi);
488 	ret = notifier_to_errno(ret);
489 	if (ret) {
490 		inet_free_ifa(ifa);
491 		return ret;
492 	}
493 
494 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
495 		prandom_seed((__force u32) ifa->ifa_local);
496 		ifap = last_primary;
497 	}
498 
499 	ifa->ifa_next = *ifap;
500 	*ifap = ifa;
501 
502 	inet_hash_insert(dev_net(in_dev->dev), ifa);
503 
504 	cancel_delayed_work(&check_lifetime_work);
505 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
506 
507 	/* Send message first, then call notifier.
508 	   Notifier will trigger FIB update, so that
509 	   listeners of netlink will know about new ifaddr */
510 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
511 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
512 
513 	return 0;
514 }
515 
516 static int inet_insert_ifa(struct in_ifaddr *ifa)
517 {
518 	return __inet_insert_ifa(ifa, NULL, 0);
519 }
520 
521 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
522 {
523 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
524 
525 	ASSERT_RTNL();
526 
527 	if (!in_dev) {
528 		inet_free_ifa(ifa);
529 		return -ENOBUFS;
530 	}
531 	ipv4_devconf_setall(in_dev);
532 	neigh_parms_data_state_setall(in_dev->arp_parms);
533 	if (ifa->ifa_dev != in_dev) {
534 		WARN_ON(ifa->ifa_dev);
535 		in_dev_hold(in_dev);
536 		ifa->ifa_dev = in_dev;
537 	}
538 	if (ipv4_is_loopback(ifa->ifa_local))
539 		ifa->ifa_scope = RT_SCOPE_HOST;
540 	return inet_insert_ifa(ifa);
541 }
542 
543 /* Caller must hold RCU or RTNL :
544  * We dont take a reference on found in_device
545  */
546 struct in_device *inetdev_by_index(struct net *net, int ifindex)
547 {
548 	struct net_device *dev;
549 	struct in_device *in_dev = NULL;
550 
551 	rcu_read_lock();
552 	dev = dev_get_by_index_rcu(net, ifindex);
553 	if (dev)
554 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
555 	rcu_read_unlock();
556 	return in_dev;
557 }
558 EXPORT_SYMBOL(inetdev_by_index);
559 
560 /* Called only from RTNL semaphored context. No locks. */
561 
562 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
563 				    __be32 mask)
564 {
565 	ASSERT_RTNL();
566 
567 	for_primary_ifa(in_dev) {
568 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
569 			return ifa;
570 	} endfor_ifa(in_dev);
571 	return NULL;
572 }
573 
574 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
575 {
576 	struct ip_mreqn mreq = {
577 		.imr_multiaddr.s_addr = ifa->ifa_address,
578 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
579 	};
580 	int ret;
581 
582 	ASSERT_RTNL();
583 
584 	lock_sock(sk);
585 	if (join)
586 		ret = ip_mc_join_group(sk, &mreq);
587 	else
588 		ret = ip_mc_leave_group(sk, &mreq);
589 	release_sock(sk);
590 
591 	return ret;
592 }
593 
594 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
595 			    struct netlink_ext_ack *extack)
596 {
597 	struct net *net = sock_net(skb->sk);
598 	struct nlattr *tb[IFA_MAX+1];
599 	struct in_device *in_dev;
600 	struct ifaddrmsg *ifm;
601 	struct in_ifaddr *ifa, **ifap;
602 	int err = -EINVAL;
603 
604 	ASSERT_RTNL();
605 
606 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
607 			  extack);
608 	if (err < 0)
609 		goto errout;
610 
611 	ifm = nlmsg_data(nlh);
612 	in_dev = inetdev_by_index(net, ifm->ifa_index);
613 	if (!in_dev) {
614 		err = -ENODEV;
615 		goto errout;
616 	}
617 
618 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
619 	     ifap = &ifa->ifa_next) {
620 		if (tb[IFA_LOCAL] &&
621 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
622 			continue;
623 
624 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
625 			continue;
626 
627 		if (tb[IFA_ADDRESS] &&
628 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
629 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
630 			continue;
631 
632 		if (ipv4_is_multicast(ifa->ifa_address))
633 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
634 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
635 		return 0;
636 	}
637 
638 	err = -EADDRNOTAVAIL;
639 errout:
640 	return err;
641 }
642 
643 #define INFINITY_LIFE_TIME	0xFFFFFFFF
644 
645 static void check_lifetime(struct work_struct *work)
646 {
647 	unsigned long now, next, next_sec, next_sched;
648 	struct in_ifaddr *ifa;
649 	struct hlist_node *n;
650 	int i;
651 
652 	now = jiffies;
653 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
654 
655 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
656 		bool change_needed = false;
657 
658 		rcu_read_lock();
659 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
660 			unsigned long age;
661 
662 			if (ifa->ifa_flags & IFA_F_PERMANENT)
663 				continue;
664 
665 			/* We try to batch several events at once. */
666 			age = (now - ifa->ifa_tstamp +
667 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
668 
669 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
670 			    age >= ifa->ifa_valid_lft) {
671 				change_needed = true;
672 			} else if (ifa->ifa_preferred_lft ==
673 				   INFINITY_LIFE_TIME) {
674 				continue;
675 			} else if (age >= ifa->ifa_preferred_lft) {
676 				if (time_before(ifa->ifa_tstamp +
677 						ifa->ifa_valid_lft * HZ, next))
678 					next = ifa->ifa_tstamp +
679 					       ifa->ifa_valid_lft * HZ;
680 
681 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
682 					change_needed = true;
683 			} else if (time_before(ifa->ifa_tstamp +
684 					       ifa->ifa_preferred_lft * HZ,
685 					       next)) {
686 				next = ifa->ifa_tstamp +
687 				       ifa->ifa_preferred_lft * HZ;
688 			}
689 		}
690 		rcu_read_unlock();
691 		if (!change_needed)
692 			continue;
693 		rtnl_lock();
694 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
695 			unsigned long age;
696 
697 			if (ifa->ifa_flags & IFA_F_PERMANENT)
698 				continue;
699 
700 			/* We try to batch several events at once. */
701 			age = (now - ifa->ifa_tstamp +
702 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
703 
704 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
705 			    age >= ifa->ifa_valid_lft) {
706 				struct in_ifaddr **ifap;
707 
708 				for (ifap = &ifa->ifa_dev->ifa_list;
709 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
710 					if (*ifap == ifa) {
711 						inet_del_ifa(ifa->ifa_dev,
712 							     ifap, 1);
713 						break;
714 					}
715 				}
716 			} else if (ifa->ifa_preferred_lft !=
717 				   INFINITY_LIFE_TIME &&
718 				   age >= ifa->ifa_preferred_lft &&
719 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
720 				ifa->ifa_flags |= IFA_F_DEPRECATED;
721 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
722 			}
723 		}
724 		rtnl_unlock();
725 	}
726 
727 	next_sec = round_jiffies_up(next);
728 	next_sched = next;
729 
730 	/* If rounded timeout is accurate enough, accept it. */
731 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
732 		next_sched = next_sec;
733 
734 	now = jiffies;
735 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
736 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
737 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
738 
739 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
740 			next_sched - now);
741 }
742 
743 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
744 			     __u32 prefered_lft)
745 {
746 	unsigned long timeout;
747 
748 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
749 
750 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
751 	if (addrconf_finite_timeout(timeout))
752 		ifa->ifa_valid_lft = timeout;
753 	else
754 		ifa->ifa_flags |= IFA_F_PERMANENT;
755 
756 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
757 	if (addrconf_finite_timeout(timeout)) {
758 		if (timeout == 0)
759 			ifa->ifa_flags |= IFA_F_DEPRECATED;
760 		ifa->ifa_preferred_lft = timeout;
761 	}
762 	ifa->ifa_tstamp = jiffies;
763 	if (!ifa->ifa_cstamp)
764 		ifa->ifa_cstamp = ifa->ifa_tstamp;
765 }
766 
767 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
768 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
769 {
770 	struct nlattr *tb[IFA_MAX+1];
771 	struct in_ifaddr *ifa;
772 	struct ifaddrmsg *ifm;
773 	struct net_device *dev;
774 	struct in_device *in_dev;
775 	int err;
776 
777 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
778 			  NULL);
779 	if (err < 0)
780 		goto errout;
781 
782 	ifm = nlmsg_data(nlh);
783 	err = -EINVAL;
784 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
785 		goto errout;
786 
787 	dev = __dev_get_by_index(net, ifm->ifa_index);
788 	err = -ENODEV;
789 	if (!dev)
790 		goto errout;
791 
792 	in_dev = __in_dev_get_rtnl(dev);
793 	err = -ENOBUFS;
794 	if (!in_dev)
795 		goto errout;
796 
797 	ifa = inet_alloc_ifa();
798 	if (!ifa)
799 		/*
800 		 * A potential indev allocation can be left alive, it stays
801 		 * assigned to its device and is destroy with it.
802 		 */
803 		goto errout;
804 
805 	ipv4_devconf_setall(in_dev);
806 	neigh_parms_data_state_setall(in_dev->arp_parms);
807 	in_dev_hold(in_dev);
808 
809 	if (!tb[IFA_ADDRESS])
810 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
811 
812 	INIT_HLIST_NODE(&ifa->hash);
813 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
814 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
815 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
816 					 ifm->ifa_flags;
817 	ifa->ifa_scope = ifm->ifa_scope;
818 	ifa->ifa_dev = in_dev;
819 
820 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
821 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
822 
823 	if (tb[IFA_BROADCAST])
824 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
825 
826 	if (tb[IFA_LABEL])
827 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
828 	else
829 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
830 
831 	if (tb[IFA_CACHEINFO]) {
832 		struct ifa_cacheinfo *ci;
833 
834 		ci = nla_data(tb[IFA_CACHEINFO]);
835 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
836 			err = -EINVAL;
837 			goto errout_free;
838 		}
839 		*pvalid_lft = ci->ifa_valid;
840 		*pprefered_lft = ci->ifa_prefered;
841 	}
842 
843 	return ifa;
844 
845 errout_free:
846 	inet_free_ifa(ifa);
847 errout:
848 	return ERR_PTR(err);
849 }
850 
851 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
852 {
853 	struct in_device *in_dev = ifa->ifa_dev;
854 	struct in_ifaddr *ifa1, **ifap;
855 
856 	if (!ifa->ifa_local)
857 		return NULL;
858 
859 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
860 	     ifap = &ifa1->ifa_next) {
861 		if (ifa1->ifa_mask == ifa->ifa_mask &&
862 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
863 		    ifa1->ifa_local == ifa->ifa_local)
864 			return ifa1;
865 	}
866 	return NULL;
867 }
868 
869 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
870 			    struct netlink_ext_ack *extack)
871 {
872 	struct net *net = sock_net(skb->sk);
873 	struct in_ifaddr *ifa;
874 	struct in_ifaddr *ifa_existing;
875 	__u32 valid_lft = INFINITY_LIFE_TIME;
876 	__u32 prefered_lft = INFINITY_LIFE_TIME;
877 
878 	ASSERT_RTNL();
879 
880 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
881 	if (IS_ERR(ifa))
882 		return PTR_ERR(ifa);
883 
884 	ifa_existing = find_matching_ifa(ifa);
885 	if (!ifa_existing) {
886 		/* It would be best to check for !NLM_F_CREATE here but
887 		 * userspace already relies on not having to provide this.
888 		 */
889 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
890 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
891 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
892 					       true, ifa);
893 
894 			if (ret < 0) {
895 				inet_free_ifa(ifa);
896 				return ret;
897 			}
898 		}
899 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
900 	} else {
901 		inet_free_ifa(ifa);
902 
903 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
904 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
905 			return -EEXIST;
906 		ifa = ifa_existing;
907 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
908 		cancel_delayed_work(&check_lifetime_work);
909 		queue_delayed_work(system_power_efficient_wq,
910 				&check_lifetime_work, 0);
911 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
912 	}
913 	return 0;
914 }
915 
916 /*
917  *	Determine a default network mask, based on the IP address.
918  */
919 
920 static int inet_abc_len(__be32 addr)
921 {
922 	int rc = -1;	/* Something else, probably a multicast. */
923 
924 	if (ipv4_is_zeronet(addr))
925 		rc = 0;
926 	else {
927 		__u32 haddr = ntohl(addr);
928 
929 		if (IN_CLASSA(haddr))
930 			rc = 8;
931 		else if (IN_CLASSB(haddr))
932 			rc = 16;
933 		else if (IN_CLASSC(haddr))
934 			rc = 24;
935 	}
936 
937 	return rc;
938 }
939 
940 
941 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
942 {
943 	struct ifreq ifr;
944 	struct sockaddr_in sin_orig;
945 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
946 	struct in_device *in_dev;
947 	struct in_ifaddr **ifap = NULL;
948 	struct in_ifaddr *ifa = NULL;
949 	struct net_device *dev;
950 	char *colon;
951 	int ret = -EFAULT;
952 	int tryaddrmatch = 0;
953 
954 	/*
955 	 *	Fetch the caller's info block into kernel space
956 	 */
957 
958 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
959 		goto out;
960 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
961 
962 	/* save original address for comparison */
963 	memcpy(&sin_orig, sin, sizeof(*sin));
964 
965 	colon = strchr(ifr.ifr_name, ':');
966 	if (colon)
967 		*colon = 0;
968 
969 	dev_load(net, ifr.ifr_name);
970 
971 	switch (cmd) {
972 	case SIOCGIFADDR:	/* Get interface address */
973 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
974 	case SIOCGIFDSTADDR:	/* Get the destination address */
975 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
976 		/* Note that these ioctls will not sleep,
977 		   so that we do not impose a lock.
978 		   One day we will be forced to put shlock here (I mean SMP)
979 		 */
980 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
981 		memset(sin, 0, sizeof(*sin));
982 		sin->sin_family = AF_INET;
983 		break;
984 
985 	case SIOCSIFFLAGS:
986 		ret = -EPERM;
987 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
988 			goto out;
989 		break;
990 	case SIOCSIFADDR:	/* Set interface address (and family) */
991 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
992 	case SIOCSIFDSTADDR:	/* Set the destination address */
993 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
994 		ret = -EPERM;
995 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
996 			goto out;
997 		ret = -EINVAL;
998 		if (sin->sin_family != AF_INET)
999 			goto out;
1000 		break;
1001 	default:
1002 		ret = -EINVAL;
1003 		goto out;
1004 	}
1005 
1006 	rtnl_lock();
1007 
1008 	ret = -ENODEV;
1009 	dev = __dev_get_by_name(net, ifr.ifr_name);
1010 	if (!dev)
1011 		goto done;
1012 
1013 	if (colon)
1014 		*colon = ':';
1015 
1016 	in_dev = __in_dev_get_rtnl(dev);
1017 	if (in_dev) {
1018 		if (tryaddrmatch) {
1019 			/* Matthias Andree */
1020 			/* compare label and address (4.4BSD style) */
1021 			/* note: we only do this for a limited set of ioctls
1022 			   and only if the original address family was AF_INET.
1023 			   This is checked above. */
1024 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1025 			     ifap = &ifa->ifa_next) {
1026 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1027 				    sin_orig.sin_addr.s_addr ==
1028 							ifa->ifa_local) {
1029 					break; /* found */
1030 				}
1031 			}
1032 		}
1033 		/* we didn't get a match, maybe the application is
1034 		   4.3BSD-style and passed in junk so we fall back to
1035 		   comparing just the label */
1036 		if (!ifa) {
1037 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1038 			     ifap = &ifa->ifa_next)
1039 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1040 					break;
1041 		}
1042 	}
1043 
1044 	ret = -EADDRNOTAVAIL;
1045 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1046 		goto done;
1047 
1048 	switch (cmd) {
1049 	case SIOCGIFADDR:	/* Get interface address */
1050 		sin->sin_addr.s_addr = ifa->ifa_local;
1051 		goto rarok;
1052 
1053 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1054 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1055 		goto rarok;
1056 
1057 	case SIOCGIFDSTADDR:	/* Get the destination address */
1058 		sin->sin_addr.s_addr = ifa->ifa_address;
1059 		goto rarok;
1060 
1061 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1062 		sin->sin_addr.s_addr = ifa->ifa_mask;
1063 		goto rarok;
1064 
1065 	case SIOCSIFFLAGS:
1066 		if (colon) {
1067 			ret = -EADDRNOTAVAIL;
1068 			if (!ifa)
1069 				break;
1070 			ret = 0;
1071 			if (!(ifr.ifr_flags & IFF_UP))
1072 				inet_del_ifa(in_dev, ifap, 1);
1073 			break;
1074 		}
1075 		ret = dev_change_flags(dev, ifr.ifr_flags);
1076 		break;
1077 
1078 	case SIOCSIFADDR:	/* Set interface address (and family) */
1079 		ret = -EINVAL;
1080 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1081 			break;
1082 
1083 		if (!ifa) {
1084 			ret = -ENOBUFS;
1085 			ifa = inet_alloc_ifa();
1086 			if (!ifa)
1087 				break;
1088 			INIT_HLIST_NODE(&ifa->hash);
1089 			if (colon)
1090 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1091 			else
1092 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1093 		} else {
1094 			ret = 0;
1095 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1096 				break;
1097 			inet_del_ifa(in_dev, ifap, 0);
1098 			ifa->ifa_broadcast = 0;
1099 			ifa->ifa_scope = 0;
1100 		}
1101 
1102 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1103 
1104 		if (!(dev->flags & IFF_POINTOPOINT)) {
1105 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1106 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1107 			if ((dev->flags & IFF_BROADCAST) &&
1108 			    ifa->ifa_prefixlen < 31)
1109 				ifa->ifa_broadcast = ifa->ifa_address |
1110 						     ~ifa->ifa_mask;
1111 		} else {
1112 			ifa->ifa_prefixlen = 32;
1113 			ifa->ifa_mask = inet_make_mask(32);
1114 		}
1115 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1116 		ret = inet_set_ifa(dev, ifa);
1117 		break;
1118 
1119 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1120 		ret = 0;
1121 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1122 			inet_del_ifa(in_dev, ifap, 0);
1123 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1124 			inet_insert_ifa(ifa);
1125 		}
1126 		break;
1127 
1128 	case SIOCSIFDSTADDR:	/* Set the destination address */
1129 		ret = 0;
1130 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1131 			break;
1132 		ret = -EINVAL;
1133 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1134 			break;
1135 		ret = 0;
1136 		inet_del_ifa(in_dev, ifap, 0);
1137 		ifa->ifa_address = sin->sin_addr.s_addr;
1138 		inet_insert_ifa(ifa);
1139 		break;
1140 
1141 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1142 
1143 		/*
1144 		 *	The mask we set must be legal.
1145 		 */
1146 		ret = -EINVAL;
1147 		if (bad_mask(sin->sin_addr.s_addr, 0))
1148 			break;
1149 		ret = 0;
1150 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1151 			__be32 old_mask = ifa->ifa_mask;
1152 			inet_del_ifa(in_dev, ifap, 0);
1153 			ifa->ifa_mask = sin->sin_addr.s_addr;
1154 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1155 
1156 			/* See if current broadcast address matches
1157 			 * with current netmask, then recalculate
1158 			 * the broadcast address. Otherwise it's a
1159 			 * funny address, so don't touch it since
1160 			 * the user seems to know what (s)he's doing...
1161 			 */
1162 			if ((dev->flags & IFF_BROADCAST) &&
1163 			    (ifa->ifa_prefixlen < 31) &&
1164 			    (ifa->ifa_broadcast ==
1165 			     (ifa->ifa_local|~old_mask))) {
1166 				ifa->ifa_broadcast = (ifa->ifa_local |
1167 						      ~sin->sin_addr.s_addr);
1168 			}
1169 			inet_insert_ifa(ifa);
1170 		}
1171 		break;
1172 	}
1173 done:
1174 	rtnl_unlock();
1175 out:
1176 	return ret;
1177 rarok:
1178 	rtnl_unlock();
1179 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1180 	goto out;
1181 }
1182 
1183 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1184 {
1185 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1186 	struct in_ifaddr *ifa;
1187 	struct ifreq ifr;
1188 	int done = 0;
1189 
1190 	if (!in_dev)
1191 		goto out;
1192 
1193 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1194 		if (!buf) {
1195 			done += sizeof(ifr);
1196 			continue;
1197 		}
1198 		if (len < (int) sizeof(ifr))
1199 			break;
1200 		memset(&ifr, 0, sizeof(struct ifreq));
1201 		strcpy(ifr.ifr_name, ifa->ifa_label);
1202 
1203 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1204 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1205 								ifa->ifa_local;
1206 
1207 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1208 			done = -EFAULT;
1209 			break;
1210 		}
1211 		buf  += sizeof(struct ifreq);
1212 		len  -= sizeof(struct ifreq);
1213 		done += sizeof(struct ifreq);
1214 	}
1215 out:
1216 	return done;
1217 }
1218 
1219 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1220 				 int scope)
1221 {
1222 	for_primary_ifa(in_dev) {
1223 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1224 		    ifa->ifa_scope <= scope)
1225 			return ifa->ifa_local;
1226 	} endfor_ifa(in_dev);
1227 
1228 	return 0;
1229 }
1230 
1231 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1232 {
1233 	__be32 addr = 0;
1234 	struct in_device *in_dev;
1235 	struct net *net = dev_net(dev);
1236 	int master_idx;
1237 
1238 	rcu_read_lock();
1239 	in_dev = __in_dev_get_rcu(dev);
1240 	if (!in_dev)
1241 		goto no_in_dev;
1242 
1243 	for_primary_ifa(in_dev) {
1244 		if (ifa->ifa_scope > scope)
1245 			continue;
1246 		if (!dst || inet_ifa_match(dst, ifa)) {
1247 			addr = ifa->ifa_local;
1248 			break;
1249 		}
1250 		if (!addr)
1251 			addr = ifa->ifa_local;
1252 	} endfor_ifa(in_dev);
1253 
1254 	if (addr)
1255 		goto out_unlock;
1256 no_in_dev:
1257 	master_idx = l3mdev_master_ifindex_rcu(dev);
1258 
1259 	/* For VRFs, the VRF device takes the place of the loopback device,
1260 	 * with addresses on it being preferred.  Note in such cases the
1261 	 * loopback device will be among the devices that fail the master_idx
1262 	 * equality check in the loop below.
1263 	 */
1264 	if (master_idx &&
1265 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1266 	    (in_dev = __in_dev_get_rcu(dev))) {
1267 		addr = in_dev_select_addr(in_dev, scope);
1268 		if (addr)
1269 			goto out_unlock;
1270 	}
1271 
1272 	/* Not loopback addresses on loopback should be preferred
1273 	   in this case. It is important that lo is the first interface
1274 	   in dev_base list.
1275 	 */
1276 	for_each_netdev_rcu(net, dev) {
1277 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1278 			continue;
1279 
1280 		in_dev = __in_dev_get_rcu(dev);
1281 		if (!in_dev)
1282 			continue;
1283 
1284 		addr = in_dev_select_addr(in_dev, scope);
1285 		if (addr)
1286 			goto out_unlock;
1287 	}
1288 out_unlock:
1289 	rcu_read_unlock();
1290 	return addr;
1291 }
1292 EXPORT_SYMBOL(inet_select_addr);
1293 
1294 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1295 			      __be32 local, int scope)
1296 {
1297 	int same = 0;
1298 	__be32 addr = 0;
1299 
1300 	for_ifa(in_dev) {
1301 		if (!addr &&
1302 		    (local == ifa->ifa_local || !local) &&
1303 		    ifa->ifa_scope <= scope) {
1304 			addr = ifa->ifa_local;
1305 			if (same)
1306 				break;
1307 		}
1308 		if (!same) {
1309 			same = (!local || inet_ifa_match(local, ifa)) &&
1310 				(!dst || inet_ifa_match(dst, ifa));
1311 			if (same && addr) {
1312 				if (local || !dst)
1313 					break;
1314 				/* Is the selected addr into dst subnet? */
1315 				if (inet_ifa_match(addr, ifa))
1316 					break;
1317 				/* No, then can we use new local src? */
1318 				if (ifa->ifa_scope <= scope) {
1319 					addr = ifa->ifa_local;
1320 					break;
1321 				}
1322 				/* search for large dst subnet for addr */
1323 				same = 0;
1324 			}
1325 		}
1326 	} endfor_ifa(in_dev);
1327 
1328 	return same ? addr : 0;
1329 }
1330 
1331 /*
1332  * Confirm that local IP address exists using wildcards:
1333  * - net: netns to check, cannot be NULL
1334  * - in_dev: only on this interface, NULL=any interface
1335  * - dst: only in the same subnet as dst, 0=any dst
1336  * - local: address, 0=autoselect the local address
1337  * - scope: maximum allowed scope value for the local address
1338  */
1339 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1340 			 __be32 dst, __be32 local, int scope)
1341 {
1342 	__be32 addr = 0;
1343 	struct net_device *dev;
1344 
1345 	if (in_dev)
1346 		return confirm_addr_indev(in_dev, dst, local, scope);
1347 
1348 	rcu_read_lock();
1349 	for_each_netdev_rcu(net, dev) {
1350 		in_dev = __in_dev_get_rcu(dev);
1351 		if (in_dev) {
1352 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1353 			if (addr)
1354 				break;
1355 		}
1356 	}
1357 	rcu_read_unlock();
1358 
1359 	return addr;
1360 }
1361 EXPORT_SYMBOL(inet_confirm_addr);
1362 
1363 /*
1364  *	Device notifier
1365  */
1366 
1367 int register_inetaddr_notifier(struct notifier_block *nb)
1368 {
1369 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1370 }
1371 EXPORT_SYMBOL(register_inetaddr_notifier);
1372 
1373 int unregister_inetaddr_notifier(struct notifier_block *nb)
1374 {
1375 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1376 }
1377 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1378 
1379 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1380 {
1381 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1382 }
1383 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1384 
1385 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1386 {
1387 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1388 	    nb);
1389 }
1390 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1391 
1392 /* Rename ifa_labels for a device name change. Make some effort to preserve
1393  * existing alias numbering and to create unique labels if possible.
1394 */
1395 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1396 {
1397 	struct in_ifaddr *ifa;
1398 	int named = 0;
1399 
1400 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1401 		char old[IFNAMSIZ], *dot;
1402 
1403 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1404 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1405 		if (named++ == 0)
1406 			goto skip;
1407 		dot = strchr(old, ':');
1408 		if (!dot) {
1409 			sprintf(old, ":%d", named);
1410 			dot = old;
1411 		}
1412 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1413 			strcat(ifa->ifa_label, dot);
1414 		else
1415 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1416 skip:
1417 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1418 	}
1419 }
1420 
1421 static bool inetdev_valid_mtu(unsigned int mtu)
1422 {
1423 	return mtu >= 68;
1424 }
1425 
1426 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1427 					struct in_device *in_dev)
1428 
1429 {
1430 	struct in_ifaddr *ifa;
1431 
1432 	for (ifa = in_dev->ifa_list; ifa;
1433 	     ifa = ifa->ifa_next) {
1434 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1435 			 ifa->ifa_local, dev,
1436 			 ifa->ifa_local, NULL,
1437 			 dev->dev_addr, NULL);
1438 	}
1439 }
1440 
1441 /* Called only under RTNL semaphore */
1442 
1443 static int inetdev_event(struct notifier_block *this, unsigned long event,
1444 			 void *ptr)
1445 {
1446 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1447 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1448 
1449 	ASSERT_RTNL();
1450 
1451 	if (!in_dev) {
1452 		if (event == NETDEV_REGISTER) {
1453 			in_dev = inetdev_init(dev);
1454 			if (IS_ERR(in_dev))
1455 				return notifier_from_errno(PTR_ERR(in_dev));
1456 			if (dev->flags & IFF_LOOPBACK) {
1457 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1458 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1459 			}
1460 		} else if (event == NETDEV_CHANGEMTU) {
1461 			/* Re-enabling IP */
1462 			if (inetdev_valid_mtu(dev->mtu))
1463 				in_dev = inetdev_init(dev);
1464 		}
1465 		goto out;
1466 	}
1467 
1468 	switch (event) {
1469 	case NETDEV_REGISTER:
1470 		pr_debug("%s: bug\n", __func__);
1471 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1472 		break;
1473 	case NETDEV_UP:
1474 		if (!inetdev_valid_mtu(dev->mtu))
1475 			break;
1476 		if (dev->flags & IFF_LOOPBACK) {
1477 			struct in_ifaddr *ifa = inet_alloc_ifa();
1478 
1479 			if (ifa) {
1480 				INIT_HLIST_NODE(&ifa->hash);
1481 				ifa->ifa_local =
1482 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1483 				ifa->ifa_prefixlen = 8;
1484 				ifa->ifa_mask = inet_make_mask(8);
1485 				in_dev_hold(in_dev);
1486 				ifa->ifa_dev = in_dev;
1487 				ifa->ifa_scope = RT_SCOPE_HOST;
1488 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1489 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1490 						 INFINITY_LIFE_TIME);
1491 				ipv4_devconf_setall(in_dev);
1492 				neigh_parms_data_state_setall(in_dev->arp_parms);
1493 				inet_insert_ifa(ifa);
1494 			}
1495 		}
1496 		ip_mc_up(in_dev);
1497 		/* fall through */
1498 	case NETDEV_CHANGEADDR:
1499 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1500 			break;
1501 		/* fall through */
1502 	case NETDEV_NOTIFY_PEERS:
1503 		/* Send gratuitous ARP to notify of link change */
1504 		inetdev_send_gratuitous_arp(dev, in_dev);
1505 		break;
1506 	case NETDEV_DOWN:
1507 		ip_mc_down(in_dev);
1508 		break;
1509 	case NETDEV_PRE_TYPE_CHANGE:
1510 		ip_mc_unmap(in_dev);
1511 		break;
1512 	case NETDEV_POST_TYPE_CHANGE:
1513 		ip_mc_remap(in_dev);
1514 		break;
1515 	case NETDEV_CHANGEMTU:
1516 		if (inetdev_valid_mtu(dev->mtu))
1517 			break;
1518 		/* disable IP when MTU is not enough */
1519 	case NETDEV_UNREGISTER:
1520 		inetdev_destroy(in_dev);
1521 		break;
1522 	case NETDEV_CHANGENAME:
1523 		/* Do not notify about label change, this event is
1524 		 * not interesting to applications using netlink.
1525 		 */
1526 		inetdev_changename(dev, in_dev);
1527 
1528 		devinet_sysctl_unregister(in_dev);
1529 		devinet_sysctl_register(in_dev);
1530 		break;
1531 	}
1532 out:
1533 	return NOTIFY_DONE;
1534 }
1535 
1536 static struct notifier_block ip_netdev_notifier = {
1537 	.notifier_call = inetdev_event,
1538 };
1539 
1540 static size_t inet_nlmsg_size(void)
1541 {
1542 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1543 	       + nla_total_size(4) /* IFA_ADDRESS */
1544 	       + nla_total_size(4) /* IFA_LOCAL */
1545 	       + nla_total_size(4) /* IFA_BROADCAST */
1546 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1547 	       + nla_total_size(4)  /* IFA_FLAGS */
1548 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1549 }
1550 
1551 static inline u32 cstamp_delta(unsigned long cstamp)
1552 {
1553 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1554 }
1555 
1556 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1557 			 unsigned long tstamp, u32 preferred, u32 valid)
1558 {
1559 	struct ifa_cacheinfo ci;
1560 
1561 	ci.cstamp = cstamp_delta(cstamp);
1562 	ci.tstamp = cstamp_delta(tstamp);
1563 	ci.ifa_prefered = preferred;
1564 	ci.ifa_valid = valid;
1565 
1566 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1567 }
1568 
1569 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1570 			    u32 portid, u32 seq, int event, unsigned int flags)
1571 {
1572 	struct ifaddrmsg *ifm;
1573 	struct nlmsghdr  *nlh;
1574 	u32 preferred, valid;
1575 
1576 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1577 	if (!nlh)
1578 		return -EMSGSIZE;
1579 
1580 	ifm = nlmsg_data(nlh);
1581 	ifm->ifa_family = AF_INET;
1582 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1583 	ifm->ifa_flags = ifa->ifa_flags;
1584 	ifm->ifa_scope = ifa->ifa_scope;
1585 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1586 
1587 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1588 		preferred = ifa->ifa_preferred_lft;
1589 		valid = ifa->ifa_valid_lft;
1590 		if (preferred != INFINITY_LIFE_TIME) {
1591 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1592 
1593 			if (preferred > tval)
1594 				preferred -= tval;
1595 			else
1596 				preferred = 0;
1597 			if (valid != INFINITY_LIFE_TIME) {
1598 				if (valid > tval)
1599 					valid -= tval;
1600 				else
1601 					valid = 0;
1602 			}
1603 		}
1604 	} else {
1605 		preferred = INFINITY_LIFE_TIME;
1606 		valid = INFINITY_LIFE_TIME;
1607 	}
1608 	if ((ifa->ifa_address &&
1609 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1610 	    (ifa->ifa_local &&
1611 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1612 	    (ifa->ifa_broadcast &&
1613 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1614 	    (ifa->ifa_label[0] &&
1615 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1616 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1617 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1618 			  preferred, valid))
1619 		goto nla_put_failure;
1620 
1621 	nlmsg_end(skb, nlh);
1622 	return 0;
1623 
1624 nla_put_failure:
1625 	nlmsg_cancel(skb, nlh);
1626 	return -EMSGSIZE;
1627 }
1628 
1629 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1630 {
1631 	struct net *net = sock_net(skb->sk);
1632 	int h, s_h;
1633 	int idx, s_idx;
1634 	int ip_idx, s_ip_idx;
1635 	struct net_device *dev;
1636 	struct in_device *in_dev;
1637 	struct in_ifaddr *ifa;
1638 	struct hlist_head *head;
1639 
1640 	s_h = cb->args[0];
1641 	s_idx = idx = cb->args[1];
1642 	s_ip_idx = ip_idx = cb->args[2];
1643 
1644 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1645 		idx = 0;
1646 		head = &net->dev_index_head[h];
1647 		rcu_read_lock();
1648 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1649 			  net->dev_base_seq;
1650 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1651 			if (idx < s_idx)
1652 				goto cont;
1653 			if (h > s_h || idx > s_idx)
1654 				s_ip_idx = 0;
1655 			in_dev = __in_dev_get_rcu(dev);
1656 			if (!in_dev)
1657 				goto cont;
1658 
1659 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1660 			     ifa = ifa->ifa_next, ip_idx++) {
1661 				if (ip_idx < s_ip_idx)
1662 					continue;
1663 				if (inet_fill_ifaddr(skb, ifa,
1664 					     NETLINK_CB(cb->skb).portid,
1665 					     cb->nlh->nlmsg_seq,
1666 					     RTM_NEWADDR, NLM_F_MULTI) < 0) {
1667 					rcu_read_unlock();
1668 					goto done;
1669 				}
1670 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1671 			}
1672 cont:
1673 			idx++;
1674 		}
1675 		rcu_read_unlock();
1676 	}
1677 
1678 done:
1679 	cb->args[0] = h;
1680 	cb->args[1] = idx;
1681 	cb->args[2] = ip_idx;
1682 
1683 	return skb->len;
1684 }
1685 
1686 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1687 		      u32 portid)
1688 {
1689 	struct sk_buff *skb;
1690 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1691 	int err = -ENOBUFS;
1692 	struct net *net;
1693 
1694 	net = dev_net(ifa->ifa_dev->dev);
1695 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1696 	if (!skb)
1697 		goto errout;
1698 
1699 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1700 	if (err < 0) {
1701 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1702 		WARN_ON(err == -EMSGSIZE);
1703 		kfree_skb(skb);
1704 		goto errout;
1705 	}
1706 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1707 	return;
1708 errout:
1709 	if (err < 0)
1710 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1711 }
1712 
1713 static size_t inet_get_link_af_size(const struct net_device *dev,
1714 				    u32 ext_filter_mask)
1715 {
1716 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1717 
1718 	if (!in_dev)
1719 		return 0;
1720 
1721 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1722 }
1723 
1724 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1725 			     u32 ext_filter_mask)
1726 {
1727 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1728 	struct nlattr *nla;
1729 	int i;
1730 
1731 	if (!in_dev)
1732 		return -ENODATA;
1733 
1734 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1735 	if (!nla)
1736 		return -EMSGSIZE;
1737 
1738 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1739 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1740 
1741 	return 0;
1742 }
1743 
1744 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1745 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1746 };
1747 
1748 static int inet_validate_link_af(const struct net_device *dev,
1749 				 const struct nlattr *nla)
1750 {
1751 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1752 	int err, rem;
1753 
1754 	if (dev && !__in_dev_get_rtnl(dev))
1755 		return -EAFNOSUPPORT;
1756 
1757 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1758 	if (err < 0)
1759 		return err;
1760 
1761 	if (tb[IFLA_INET_CONF]) {
1762 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1763 			int cfgid = nla_type(a);
1764 
1765 			if (nla_len(a) < 4)
1766 				return -EINVAL;
1767 
1768 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1769 				return -EINVAL;
1770 		}
1771 	}
1772 
1773 	return 0;
1774 }
1775 
1776 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1777 {
1778 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1779 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1780 	int rem;
1781 
1782 	if (!in_dev)
1783 		return -EAFNOSUPPORT;
1784 
1785 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1786 		BUG();
1787 
1788 	if (tb[IFLA_INET_CONF]) {
1789 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1790 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1791 	}
1792 
1793 	return 0;
1794 }
1795 
1796 static int inet_netconf_msgsize_devconf(int type)
1797 {
1798 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1799 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1800 	bool all = false;
1801 
1802 	if (type == NETCONFA_ALL)
1803 		all = true;
1804 
1805 	if (all || type == NETCONFA_FORWARDING)
1806 		size += nla_total_size(4);
1807 	if (all || type == NETCONFA_RP_FILTER)
1808 		size += nla_total_size(4);
1809 	if (all || type == NETCONFA_MC_FORWARDING)
1810 		size += nla_total_size(4);
1811 	if (all || type == NETCONFA_PROXY_NEIGH)
1812 		size += nla_total_size(4);
1813 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1814 		size += nla_total_size(4);
1815 
1816 	return size;
1817 }
1818 
1819 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1820 				     struct ipv4_devconf *devconf, u32 portid,
1821 				     u32 seq, int event, unsigned int flags,
1822 				     int type)
1823 {
1824 	struct nlmsghdr  *nlh;
1825 	struct netconfmsg *ncm;
1826 	bool all = false;
1827 
1828 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1829 			flags);
1830 	if (!nlh)
1831 		return -EMSGSIZE;
1832 
1833 	if (type == NETCONFA_ALL)
1834 		all = true;
1835 
1836 	ncm = nlmsg_data(nlh);
1837 	ncm->ncm_family = AF_INET;
1838 
1839 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1840 		goto nla_put_failure;
1841 
1842 	if (!devconf)
1843 		goto out;
1844 
1845 	if ((all || type == NETCONFA_FORWARDING) &&
1846 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1847 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1848 		goto nla_put_failure;
1849 	if ((all || type == NETCONFA_RP_FILTER) &&
1850 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1851 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1852 		goto nla_put_failure;
1853 	if ((all || type == NETCONFA_MC_FORWARDING) &&
1854 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1855 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1856 		goto nla_put_failure;
1857 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
1858 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1859 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1860 		goto nla_put_failure;
1861 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1862 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1863 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1864 		goto nla_put_failure;
1865 
1866 out:
1867 	nlmsg_end(skb, nlh);
1868 	return 0;
1869 
1870 nla_put_failure:
1871 	nlmsg_cancel(skb, nlh);
1872 	return -EMSGSIZE;
1873 }
1874 
1875 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1876 				 int ifindex, struct ipv4_devconf *devconf)
1877 {
1878 	struct sk_buff *skb;
1879 	int err = -ENOBUFS;
1880 
1881 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1882 	if (!skb)
1883 		goto errout;
1884 
1885 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1886 					event, 0, type);
1887 	if (err < 0) {
1888 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1889 		WARN_ON(err == -EMSGSIZE);
1890 		kfree_skb(skb);
1891 		goto errout;
1892 	}
1893 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1894 	return;
1895 errout:
1896 	if (err < 0)
1897 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1898 }
1899 
1900 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1901 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1902 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1903 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1904 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1905 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
1906 };
1907 
1908 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1909 				    struct nlmsghdr *nlh,
1910 				    struct netlink_ext_ack *extack)
1911 {
1912 	struct net *net = sock_net(in_skb->sk);
1913 	struct nlattr *tb[NETCONFA_MAX+1];
1914 	struct netconfmsg *ncm;
1915 	struct sk_buff *skb;
1916 	struct ipv4_devconf *devconf;
1917 	struct in_device *in_dev;
1918 	struct net_device *dev;
1919 	int ifindex;
1920 	int err;
1921 
1922 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1923 			  devconf_ipv4_policy, extack);
1924 	if (err < 0)
1925 		goto errout;
1926 
1927 	err = -EINVAL;
1928 	if (!tb[NETCONFA_IFINDEX])
1929 		goto errout;
1930 
1931 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1932 	switch (ifindex) {
1933 	case NETCONFA_IFINDEX_ALL:
1934 		devconf = net->ipv4.devconf_all;
1935 		break;
1936 	case NETCONFA_IFINDEX_DEFAULT:
1937 		devconf = net->ipv4.devconf_dflt;
1938 		break;
1939 	default:
1940 		dev = __dev_get_by_index(net, ifindex);
1941 		if (!dev)
1942 			goto errout;
1943 		in_dev = __in_dev_get_rtnl(dev);
1944 		if (!in_dev)
1945 			goto errout;
1946 		devconf = &in_dev->cnf;
1947 		break;
1948 	}
1949 
1950 	err = -ENOBUFS;
1951 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1952 	if (!skb)
1953 		goto errout;
1954 
1955 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1956 					NETLINK_CB(in_skb).portid,
1957 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1958 					NETCONFA_ALL);
1959 	if (err < 0) {
1960 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1961 		WARN_ON(err == -EMSGSIZE);
1962 		kfree_skb(skb);
1963 		goto errout;
1964 	}
1965 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1966 errout:
1967 	return err;
1968 }
1969 
1970 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1971 				     struct netlink_callback *cb)
1972 {
1973 	struct net *net = sock_net(skb->sk);
1974 	int h, s_h;
1975 	int idx, s_idx;
1976 	struct net_device *dev;
1977 	struct in_device *in_dev;
1978 	struct hlist_head *head;
1979 
1980 	s_h = cb->args[0];
1981 	s_idx = idx = cb->args[1];
1982 
1983 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1984 		idx = 0;
1985 		head = &net->dev_index_head[h];
1986 		rcu_read_lock();
1987 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1988 			  net->dev_base_seq;
1989 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1990 			if (idx < s_idx)
1991 				goto cont;
1992 			in_dev = __in_dev_get_rcu(dev);
1993 			if (!in_dev)
1994 				goto cont;
1995 
1996 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1997 						      &in_dev->cnf,
1998 						      NETLINK_CB(cb->skb).portid,
1999 						      cb->nlh->nlmsg_seq,
2000 						      RTM_NEWNETCONF,
2001 						      NLM_F_MULTI,
2002 						      NETCONFA_ALL) < 0) {
2003 				rcu_read_unlock();
2004 				goto done;
2005 			}
2006 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2007 cont:
2008 			idx++;
2009 		}
2010 		rcu_read_unlock();
2011 	}
2012 	if (h == NETDEV_HASHENTRIES) {
2013 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2014 					      net->ipv4.devconf_all,
2015 					      NETLINK_CB(cb->skb).portid,
2016 					      cb->nlh->nlmsg_seq,
2017 					      RTM_NEWNETCONF, NLM_F_MULTI,
2018 					      NETCONFA_ALL) < 0)
2019 			goto done;
2020 		else
2021 			h++;
2022 	}
2023 	if (h == NETDEV_HASHENTRIES + 1) {
2024 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2025 					      net->ipv4.devconf_dflt,
2026 					      NETLINK_CB(cb->skb).portid,
2027 					      cb->nlh->nlmsg_seq,
2028 					      RTM_NEWNETCONF, NLM_F_MULTI,
2029 					      NETCONFA_ALL) < 0)
2030 			goto done;
2031 		else
2032 			h++;
2033 	}
2034 done:
2035 	cb->args[0] = h;
2036 	cb->args[1] = idx;
2037 
2038 	return skb->len;
2039 }
2040 
2041 #ifdef CONFIG_SYSCTL
2042 
2043 static void devinet_copy_dflt_conf(struct net *net, int i)
2044 {
2045 	struct net_device *dev;
2046 
2047 	rcu_read_lock();
2048 	for_each_netdev_rcu(net, dev) {
2049 		struct in_device *in_dev;
2050 
2051 		in_dev = __in_dev_get_rcu(dev);
2052 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2053 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2054 	}
2055 	rcu_read_unlock();
2056 }
2057 
2058 /* called with RTNL locked */
2059 static void inet_forward_change(struct net *net)
2060 {
2061 	struct net_device *dev;
2062 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2063 
2064 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2065 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2066 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2067 				    NETCONFA_FORWARDING,
2068 				    NETCONFA_IFINDEX_ALL,
2069 				    net->ipv4.devconf_all);
2070 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2071 				    NETCONFA_FORWARDING,
2072 				    NETCONFA_IFINDEX_DEFAULT,
2073 				    net->ipv4.devconf_dflt);
2074 
2075 	for_each_netdev(net, dev) {
2076 		struct in_device *in_dev;
2077 
2078 		if (on)
2079 			dev_disable_lro(dev);
2080 
2081 		in_dev = __in_dev_get_rtnl(dev);
2082 		if (in_dev) {
2083 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2084 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2085 						    NETCONFA_FORWARDING,
2086 						    dev->ifindex, &in_dev->cnf);
2087 		}
2088 	}
2089 }
2090 
2091 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2092 {
2093 	if (cnf == net->ipv4.devconf_dflt)
2094 		return NETCONFA_IFINDEX_DEFAULT;
2095 	else if (cnf == net->ipv4.devconf_all)
2096 		return NETCONFA_IFINDEX_ALL;
2097 	else {
2098 		struct in_device *idev
2099 			= container_of(cnf, struct in_device, cnf);
2100 		return idev->dev->ifindex;
2101 	}
2102 }
2103 
2104 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2105 			     void __user *buffer,
2106 			     size_t *lenp, loff_t *ppos)
2107 {
2108 	int old_value = *(int *)ctl->data;
2109 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2110 	int new_value = *(int *)ctl->data;
2111 
2112 	if (write) {
2113 		struct ipv4_devconf *cnf = ctl->extra1;
2114 		struct net *net = ctl->extra2;
2115 		int i = (int *)ctl->data - cnf->data;
2116 		int ifindex;
2117 
2118 		set_bit(i, cnf->state);
2119 
2120 		if (cnf == net->ipv4.devconf_dflt)
2121 			devinet_copy_dflt_conf(net, i);
2122 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2123 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2124 			if ((new_value == 0) && (old_value != 0))
2125 				rt_cache_flush(net);
2126 
2127 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2128 		    new_value != old_value) {
2129 			ifindex = devinet_conf_ifindex(net, cnf);
2130 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2131 						    NETCONFA_RP_FILTER,
2132 						    ifindex, cnf);
2133 		}
2134 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2135 		    new_value != old_value) {
2136 			ifindex = devinet_conf_ifindex(net, cnf);
2137 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2138 						    NETCONFA_PROXY_NEIGH,
2139 						    ifindex, cnf);
2140 		}
2141 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2142 		    new_value != old_value) {
2143 			ifindex = devinet_conf_ifindex(net, cnf);
2144 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2145 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2146 						    ifindex, cnf);
2147 		}
2148 	}
2149 
2150 	return ret;
2151 }
2152 
2153 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2154 				  void __user *buffer,
2155 				  size_t *lenp, loff_t *ppos)
2156 {
2157 	int *valp = ctl->data;
2158 	int val = *valp;
2159 	loff_t pos = *ppos;
2160 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2161 
2162 	if (write && *valp != val) {
2163 		struct net *net = ctl->extra2;
2164 
2165 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2166 			if (!rtnl_trylock()) {
2167 				/* Restore the original values before restarting */
2168 				*valp = val;
2169 				*ppos = pos;
2170 				return restart_syscall();
2171 			}
2172 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2173 				inet_forward_change(net);
2174 			} else {
2175 				struct ipv4_devconf *cnf = ctl->extra1;
2176 				struct in_device *idev =
2177 					container_of(cnf, struct in_device, cnf);
2178 				if (*valp)
2179 					dev_disable_lro(idev->dev);
2180 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2181 							    NETCONFA_FORWARDING,
2182 							    idev->dev->ifindex,
2183 							    cnf);
2184 			}
2185 			rtnl_unlock();
2186 			rt_cache_flush(net);
2187 		} else
2188 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2189 						    NETCONFA_FORWARDING,
2190 						    NETCONFA_IFINDEX_DEFAULT,
2191 						    net->ipv4.devconf_dflt);
2192 	}
2193 
2194 	return ret;
2195 }
2196 
2197 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2198 				void __user *buffer,
2199 				size_t *lenp, loff_t *ppos)
2200 {
2201 	int *valp = ctl->data;
2202 	int val = *valp;
2203 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2204 	struct net *net = ctl->extra2;
2205 
2206 	if (write && *valp != val)
2207 		rt_cache_flush(net);
2208 
2209 	return ret;
2210 }
2211 
2212 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2213 	{ \
2214 		.procname	= name, \
2215 		.data		= ipv4_devconf.data + \
2216 				  IPV4_DEVCONF_ ## attr - 1, \
2217 		.maxlen		= sizeof(int), \
2218 		.mode		= mval, \
2219 		.proc_handler	= proc, \
2220 		.extra1		= &ipv4_devconf, \
2221 	}
2222 
2223 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2224 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2225 
2226 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2227 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2228 
2229 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2230 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2231 
2232 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2233 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2234 
2235 static struct devinet_sysctl_table {
2236 	struct ctl_table_header *sysctl_header;
2237 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2238 } devinet_sysctl = {
2239 	.devinet_vars = {
2240 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2241 					     devinet_sysctl_forward),
2242 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2243 
2244 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2245 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2246 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2247 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2248 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2249 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2250 					"accept_source_route"),
2251 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2252 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2253 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2254 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2255 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2256 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2257 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2258 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2259 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2260 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2261 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2262 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2263 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2264 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2265 					"force_igmp_version"),
2266 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2267 					"igmpv2_unsolicited_report_interval"),
2268 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2269 					"igmpv3_unsolicited_report_interval"),
2270 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2271 					"ignore_routes_with_linkdown"),
2272 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2273 					"drop_gratuitous_arp"),
2274 
2275 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2276 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2277 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2278 					      "promote_secondaries"),
2279 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2280 					      "route_localnet"),
2281 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2282 					      "drop_unicast_in_l2_multicast"),
2283 	},
2284 };
2285 
2286 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2287 				     int ifindex, struct ipv4_devconf *p)
2288 {
2289 	int i;
2290 	struct devinet_sysctl_table *t;
2291 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2292 
2293 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2294 	if (!t)
2295 		goto out;
2296 
2297 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2298 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2299 		t->devinet_vars[i].extra1 = p;
2300 		t->devinet_vars[i].extra2 = net;
2301 	}
2302 
2303 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2304 
2305 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2306 	if (!t->sysctl_header)
2307 		goto free;
2308 
2309 	p->sysctl = t;
2310 
2311 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2312 				    ifindex, p);
2313 	return 0;
2314 
2315 free:
2316 	kfree(t);
2317 out:
2318 	return -ENOBUFS;
2319 }
2320 
2321 static void __devinet_sysctl_unregister(struct net *net,
2322 					struct ipv4_devconf *cnf, int ifindex)
2323 {
2324 	struct devinet_sysctl_table *t = cnf->sysctl;
2325 
2326 	if (t) {
2327 		cnf->sysctl = NULL;
2328 		unregister_net_sysctl_table(t->sysctl_header);
2329 		kfree(t);
2330 	}
2331 
2332 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2333 }
2334 
2335 static int devinet_sysctl_register(struct in_device *idev)
2336 {
2337 	int err;
2338 
2339 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2340 		return -EINVAL;
2341 
2342 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2343 	if (err)
2344 		return err;
2345 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2346 					idev->dev->ifindex, &idev->cnf);
2347 	if (err)
2348 		neigh_sysctl_unregister(idev->arp_parms);
2349 	return err;
2350 }
2351 
2352 static void devinet_sysctl_unregister(struct in_device *idev)
2353 {
2354 	struct net *net = dev_net(idev->dev);
2355 
2356 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2357 	neigh_sysctl_unregister(idev->arp_parms);
2358 }
2359 
2360 static struct ctl_table ctl_forward_entry[] = {
2361 	{
2362 		.procname	= "ip_forward",
2363 		.data		= &ipv4_devconf.data[
2364 					IPV4_DEVCONF_FORWARDING - 1],
2365 		.maxlen		= sizeof(int),
2366 		.mode		= 0644,
2367 		.proc_handler	= devinet_sysctl_forward,
2368 		.extra1		= &ipv4_devconf,
2369 		.extra2		= &init_net,
2370 	},
2371 	{ },
2372 };
2373 #endif
2374 
2375 static __net_init int devinet_init_net(struct net *net)
2376 {
2377 	int err;
2378 	struct ipv4_devconf *all, *dflt;
2379 #ifdef CONFIG_SYSCTL
2380 	struct ctl_table *tbl = ctl_forward_entry;
2381 	struct ctl_table_header *forw_hdr;
2382 #endif
2383 
2384 	err = -ENOMEM;
2385 	all = &ipv4_devconf;
2386 	dflt = &ipv4_devconf_dflt;
2387 
2388 	if (!net_eq(net, &init_net)) {
2389 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2390 		if (!all)
2391 			goto err_alloc_all;
2392 
2393 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2394 		if (!dflt)
2395 			goto err_alloc_dflt;
2396 
2397 #ifdef CONFIG_SYSCTL
2398 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2399 		if (!tbl)
2400 			goto err_alloc_ctl;
2401 
2402 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2403 		tbl[0].extra1 = all;
2404 		tbl[0].extra2 = net;
2405 #endif
2406 	}
2407 
2408 #ifdef CONFIG_SYSCTL
2409 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2410 	if (err < 0)
2411 		goto err_reg_all;
2412 
2413 	err = __devinet_sysctl_register(net, "default",
2414 					NETCONFA_IFINDEX_DEFAULT, dflt);
2415 	if (err < 0)
2416 		goto err_reg_dflt;
2417 
2418 	err = -ENOMEM;
2419 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2420 	if (!forw_hdr)
2421 		goto err_reg_ctl;
2422 	net->ipv4.forw_hdr = forw_hdr;
2423 #endif
2424 
2425 	net->ipv4.devconf_all = all;
2426 	net->ipv4.devconf_dflt = dflt;
2427 	return 0;
2428 
2429 #ifdef CONFIG_SYSCTL
2430 err_reg_ctl:
2431 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2432 err_reg_dflt:
2433 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2434 err_reg_all:
2435 	if (tbl != ctl_forward_entry)
2436 		kfree(tbl);
2437 err_alloc_ctl:
2438 #endif
2439 	if (dflt != &ipv4_devconf_dflt)
2440 		kfree(dflt);
2441 err_alloc_dflt:
2442 	if (all != &ipv4_devconf)
2443 		kfree(all);
2444 err_alloc_all:
2445 	return err;
2446 }
2447 
2448 static __net_exit void devinet_exit_net(struct net *net)
2449 {
2450 #ifdef CONFIG_SYSCTL
2451 	struct ctl_table *tbl;
2452 
2453 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2454 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2455 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2456 				    NETCONFA_IFINDEX_DEFAULT);
2457 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2458 				    NETCONFA_IFINDEX_ALL);
2459 	kfree(tbl);
2460 #endif
2461 	kfree(net->ipv4.devconf_dflt);
2462 	kfree(net->ipv4.devconf_all);
2463 }
2464 
2465 static __net_initdata struct pernet_operations devinet_ops = {
2466 	.init = devinet_init_net,
2467 	.exit = devinet_exit_net,
2468 };
2469 
2470 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2471 	.family		  = AF_INET,
2472 	.fill_link_af	  = inet_fill_link_af,
2473 	.get_link_af_size = inet_get_link_af_size,
2474 	.validate_link_af = inet_validate_link_af,
2475 	.set_link_af	  = inet_set_link_af,
2476 };
2477 
2478 void __init devinet_init(void)
2479 {
2480 	int i;
2481 
2482 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2483 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2484 
2485 	register_pernet_subsys(&devinet_ops);
2486 
2487 	register_gifconf(PF_INET, inet_gifconf);
2488 	register_netdevice_notifier(&ip_netdev_notifier);
2489 
2490 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2491 
2492 	rtnl_af_register(&inet_af_ops);
2493 
2494 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2495 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2496 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2497 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2498 		      inet_netconf_dump_devconf, 0);
2499 }
2500