xref: /openbmc/linux/net/ipv4/devinet.c (revision e2a06704)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60 
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77 	},
78 };
79 
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81 	.data = {
82 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89 	},
90 };
91 
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94 
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96 	[IFA_LOCAL]     	= { .type = NLA_U32 },
97 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
98 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
99 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
101 	[IFA_FLAGS]		= { .type = NLA_U32 },
102 };
103 
104 #define IN4_ADDR_HSIZE_SHIFT	8
105 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
106 
107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
108 
109 static u32 inet_addr_hash(const struct net *net, __be32 addr)
110 {
111 	u32 val = (__force u32) addr ^ net_hash_mix(net);
112 
113 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
114 }
115 
116 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
117 {
118 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
119 
120 	ASSERT_RTNL();
121 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
122 }
123 
124 static void inet_hash_remove(struct in_ifaddr *ifa)
125 {
126 	ASSERT_RTNL();
127 	hlist_del_init_rcu(&ifa->hash);
128 }
129 
130 /**
131  * __ip_dev_find - find the first device with a given source address.
132  * @net: the net namespace
133  * @addr: the source address
134  * @devref: if true, take a reference on the found device
135  *
136  * If a caller uses devref=false, it should be protected by RCU, or RTNL
137  */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140 	struct net_device *result = NULL;
141 	struct in_ifaddr *ifa;
142 
143 	rcu_read_lock();
144 	ifa = inet_lookup_ifaddr_rcu(net, addr);
145 	if (!ifa) {
146 		struct flowi4 fl4 = { .daddr = addr };
147 		struct fib_result res = { 0 };
148 		struct fib_table *local;
149 
150 		/* Fallback to FIB local table so that communication
151 		 * over loopback subnets work.
152 		 */
153 		local = fib_get_table(net, RT_TABLE_LOCAL);
154 		if (local &&
155 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
156 		    res.type == RTN_LOCAL)
157 			result = FIB_RES_DEV(res);
158 	} else {
159 		result = ifa->ifa_dev->dev;
160 	}
161 	if (result && devref)
162 		dev_hold(result);
163 	rcu_read_unlock();
164 	return result;
165 }
166 EXPORT_SYMBOL(__ip_dev_find);
167 
168 /* called under RCU lock */
169 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
170 {
171 	u32 hash = inet_addr_hash(net, addr);
172 	struct in_ifaddr *ifa;
173 
174 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
175 		if (ifa->ifa_local == addr &&
176 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
177 			return ifa;
178 
179 	return NULL;
180 }
181 
182 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
183 
184 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
185 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
186 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
187 			 int destroy);
188 #ifdef CONFIG_SYSCTL
189 static int devinet_sysctl_register(struct in_device *idev);
190 static void devinet_sysctl_unregister(struct in_device *idev);
191 #else
192 static int devinet_sysctl_register(struct in_device *idev)
193 {
194 	return 0;
195 }
196 static void devinet_sysctl_unregister(struct in_device *idev)
197 {
198 }
199 #endif
200 
201 /* Locks all the inet devices. */
202 
203 static struct in_ifaddr *inet_alloc_ifa(void)
204 {
205 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
206 }
207 
208 static void inet_rcu_free_ifa(struct rcu_head *head)
209 {
210 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
211 	if (ifa->ifa_dev)
212 		in_dev_put(ifa->ifa_dev);
213 	kfree(ifa);
214 }
215 
216 static void inet_free_ifa(struct in_ifaddr *ifa)
217 {
218 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
219 }
220 
221 void in_dev_finish_destroy(struct in_device *idev)
222 {
223 	struct net_device *dev = idev->dev;
224 
225 	WARN_ON(idev->ifa_list);
226 	WARN_ON(idev->mc_list);
227 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
228 #ifdef NET_REFCNT_DEBUG
229 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
230 #endif
231 	dev_put(dev);
232 	if (!idev->dead)
233 		pr_err("Freeing alive in_device %p\n", idev);
234 	else
235 		kfree(idev);
236 }
237 EXPORT_SYMBOL(in_dev_finish_destroy);
238 
239 static struct in_device *inetdev_init(struct net_device *dev)
240 {
241 	struct in_device *in_dev;
242 	int err = -ENOMEM;
243 
244 	ASSERT_RTNL();
245 
246 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
247 	if (!in_dev)
248 		goto out;
249 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
250 			sizeof(in_dev->cnf));
251 	in_dev->cnf.sysctl = NULL;
252 	in_dev->dev = dev;
253 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
254 	if (!in_dev->arp_parms)
255 		goto out_kfree;
256 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
257 		dev_disable_lro(dev);
258 	/* Reference in_dev->dev */
259 	dev_hold(dev);
260 	/* Account for reference dev->ip_ptr (below) */
261 	refcount_set(&in_dev->refcnt, 1);
262 
263 	err = devinet_sysctl_register(in_dev);
264 	if (err) {
265 		in_dev->dead = 1;
266 		in_dev_put(in_dev);
267 		in_dev = NULL;
268 		goto out;
269 	}
270 	ip_mc_init_dev(in_dev);
271 	if (dev->flags & IFF_UP)
272 		ip_mc_up(in_dev);
273 
274 	/* we can receive as soon as ip_ptr is set -- do this last */
275 	rcu_assign_pointer(dev->ip_ptr, in_dev);
276 out:
277 	return in_dev ?: ERR_PTR(err);
278 out_kfree:
279 	kfree(in_dev);
280 	in_dev = NULL;
281 	goto out;
282 }
283 
284 static void in_dev_rcu_put(struct rcu_head *head)
285 {
286 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
287 	in_dev_put(idev);
288 }
289 
290 static void inetdev_destroy(struct in_device *in_dev)
291 {
292 	struct in_ifaddr *ifa;
293 	struct net_device *dev;
294 
295 	ASSERT_RTNL();
296 
297 	dev = in_dev->dev;
298 
299 	in_dev->dead = 1;
300 
301 	ip_mc_destroy_dev(in_dev);
302 
303 	while ((ifa = in_dev->ifa_list) != NULL) {
304 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
305 		inet_free_ifa(ifa);
306 	}
307 
308 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
309 
310 	devinet_sysctl_unregister(in_dev);
311 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
312 	arp_ifdown(dev);
313 
314 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
315 }
316 
317 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
318 {
319 	rcu_read_lock();
320 	for_primary_ifa(in_dev) {
321 		if (inet_ifa_match(a, ifa)) {
322 			if (!b || inet_ifa_match(b, ifa)) {
323 				rcu_read_unlock();
324 				return 1;
325 			}
326 		}
327 	} endfor_ifa(in_dev);
328 	rcu_read_unlock();
329 	return 0;
330 }
331 
332 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
333 			 int destroy, struct nlmsghdr *nlh, u32 portid)
334 {
335 	struct in_ifaddr *promote = NULL;
336 	struct in_ifaddr *ifa, *ifa1 = *ifap;
337 	struct in_ifaddr *last_prim = in_dev->ifa_list;
338 	struct in_ifaddr *prev_prom = NULL;
339 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
340 
341 	ASSERT_RTNL();
342 
343 	if (in_dev->dead)
344 		goto no_promotions;
345 
346 	/* 1. Deleting primary ifaddr forces deletion all secondaries
347 	 * unless alias promotion is set
348 	 **/
349 
350 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
351 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
352 
353 		while ((ifa = *ifap1) != NULL) {
354 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
355 			    ifa1->ifa_scope <= ifa->ifa_scope)
356 				last_prim = ifa;
357 
358 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
359 			    ifa1->ifa_mask != ifa->ifa_mask ||
360 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
361 				ifap1 = &ifa->ifa_next;
362 				prev_prom = ifa;
363 				continue;
364 			}
365 
366 			if (!do_promote) {
367 				inet_hash_remove(ifa);
368 				*ifap1 = ifa->ifa_next;
369 
370 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
371 				blocking_notifier_call_chain(&inetaddr_chain,
372 						NETDEV_DOWN, ifa);
373 				inet_free_ifa(ifa);
374 			} else {
375 				promote = ifa;
376 				break;
377 			}
378 		}
379 	}
380 
381 	/* On promotion all secondaries from subnet are changing
382 	 * the primary IP, we must remove all their routes silently
383 	 * and later to add them back with new prefsrc. Do this
384 	 * while all addresses are on the device list.
385 	 */
386 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
387 		if (ifa1->ifa_mask == ifa->ifa_mask &&
388 		    inet_ifa_match(ifa1->ifa_address, ifa))
389 			fib_del_ifaddr(ifa, ifa1);
390 	}
391 
392 no_promotions:
393 	/* 2. Unlink it */
394 
395 	*ifap = ifa1->ifa_next;
396 	inet_hash_remove(ifa1);
397 
398 	/* 3. Announce address deletion */
399 
400 	/* Send message first, then call notifier.
401 	   At first sight, FIB update triggered by notifier
402 	   will refer to already deleted ifaddr, that could confuse
403 	   netlink listeners. It is not true: look, gated sees
404 	   that route deleted and if it still thinks that ifaddr
405 	   is valid, it will try to restore deleted routes... Grr.
406 	   So that, this order is correct.
407 	 */
408 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
409 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
410 
411 	if (promote) {
412 		struct in_ifaddr *next_sec = promote->ifa_next;
413 
414 		if (prev_prom) {
415 			prev_prom->ifa_next = promote->ifa_next;
416 			promote->ifa_next = last_prim->ifa_next;
417 			last_prim->ifa_next = promote;
418 		}
419 
420 		promote->ifa_flags &= ~IFA_F_SECONDARY;
421 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
422 		blocking_notifier_call_chain(&inetaddr_chain,
423 				NETDEV_UP, promote);
424 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
425 			if (ifa1->ifa_mask != ifa->ifa_mask ||
426 			    !inet_ifa_match(ifa1->ifa_address, ifa))
427 					continue;
428 			fib_add_ifaddr(ifa);
429 		}
430 
431 	}
432 	if (destroy)
433 		inet_free_ifa(ifa1);
434 }
435 
436 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
437 			 int destroy)
438 {
439 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
440 }
441 
442 static void check_lifetime(struct work_struct *work);
443 
444 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
445 
446 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
447 			     u32 portid, struct netlink_ext_ack *extack)
448 {
449 	struct in_device *in_dev = ifa->ifa_dev;
450 	struct in_ifaddr *ifa1, **ifap, **last_primary;
451 	struct in_validator_info ivi;
452 	int ret;
453 
454 	ASSERT_RTNL();
455 
456 	if (!ifa->ifa_local) {
457 		inet_free_ifa(ifa);
458 		return 0;
459 	}
460 
461 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
462 	last_primary = &in_dev->ifa_list;
463 
464 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
465 	     ifap = &ifa1->ifa_next) {
466 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
467 		    ifa->ifa_scope <= ifa1->ifa_scope)
468 			last_primary = &ifa1->ifa_next;
469 		if (ifa1->ifa_mask == ifa->ifa_mask &&
470 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
471 			if (ifa1->ifa_local == ifa->ifa_local) {
472 				inet_free_ifa(ifa);
473 				return -EEXIST;
474 			}
475 			if (ifa1->ifa_scope != ifa->ifa_scope) {
476 				inet_free_ifa(ifa);
477 				return -EINVAL;
478 			}
479 			ifa->ifa_flags |= IFA_F_SECONDARY;
480 		}
481 	}
482 
483 	/* Allow any devices that wish to register ifaddr validtors to weigh
484 	 * in now, before changes are committed.  The rntl lock is serializing
485 	 * access here, so the state should not change between a validator call
486 	 * and a final notify on commit.  This isn't invoked on promotion under
487 	 * the assumption that validators are checking the address itself, and
488 	 * not the flags.
489 	 */
490 	ivi.ivi_addr = ifa->ifa_address;
491 	ivi.ivi_dev = ifa->ifa_dev;
492 	ivi.extack = extack;
493 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
494 					   NETDEV_UP, &ivi);
495 	ret = notifier_to_errno(ret);
496 	if (ret) {
497 		inet_free_ifa(ifa);
498 		return ret;
499 	}
500 
501 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
502 		prandom_seed((__force u32) ifa->ifa_local);
503 		ifap = last_primary;
504 	}
505 
506 	ifa->ifa_next = *ifap;
507 	*ifap = ifa;
508 
509 	inet_hash_insert(dev_net(in_dev->dev), ifa);
510 
511 	cancel_delayed_work(&check_lifetime_work);
512 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
513 
514 	/* Send message first, then call notifier.
515 	   Notifier will trigger FIB update, so that
516 	   listeners of netlink will know about new ifaddr */
517 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
518 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
519 
520 	return 0;
521 }
522 
523 static int inet_insert_ifa(struct in_ifaddr *ifa)
524 {
525 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
526 }
527 
528 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
529 {
530 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
531 
532 	ASSERT_RTNL();
533 
534 	if (!in_dev) {
535 		inet_free_ifa(ifa);
536 		return -ENOBUFS;
537 	}
538 	ipv4_devconf_setall(in_dev);
539 	neigh_parms_data_state_setall(in_dev->arp_parms);
540 	if (ifa->ifa_dev != in_dev) {
541 		WARN_ON(ifa->ifa_dev);
542 		in_dev_hold(in_dev);
543 		ifa->ifa_dev = in_dev;
544 	}
545 	if (ipv4_is_loopback(ifa->ifa_local))
546 		ifa->ifa_scope = RT_SCOPE_HOST;
547 	return inet_insert_ifa(ifa);
548 }
549 
550 /* Caller must hold RCU or RTNL :
551  * We dont take a reference on found in_device
552  */
553 struct in_device *inetdev_by_index(struct net *net, int ifindex)
554 {
555 	struct net_device *dev;
556 	struct in_device *in_dev = NULL;
557 
558 	rcu_read_lock();
559 	dev = dev_get_by_index_rcu(net, ifindex);
560 	if (dev)
561 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
562 	rcu_read_unlock();
563 	return in_dev;
564 }
565 EXPORT_SYMBOL(inetdev_by_index);
566 
567 /* Called only from RTNL semaphored context. No locks. */
568 
569 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
570 				    __be32 mask)
571 {
572 	ASSERT_RTNL();
573 
574 	for_primary_ifa(in_dev) {
575 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
576 			return ifa;
577 	} endfor_ifa(in_dev);
578 	return NULL;
579 }
580 
581 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
582 {
583 	struct ip_mreqn mreq = {
584 		.imr_multiaddr.s_addr = ifa->ifa_address,
585 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
586 	};
587 	int ret;
588 
589 	ASSERT_RTNL();
590 
591 	lock_sock(sk);
592 	if (join)
593 		ret = ip_mc_join_group(sk, &mreq);
594 	else
595 		ret = ip_mc_leave_group(sk, &mreq);
596 	release_sock(sk);
597 
598 	return ret;
599 }
600 
601 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
602 			    struct netlink_ext_ack *extack)
603 {
604 	struct net *net = sock_net(skb->sk);
605 	struct nlattr *tb[IFA_MAX+1];
606 	struct in_device *in_dev;
607 	struct ifaddrmsg *ifm;
608 	struct in_ifaddr *ifa, **ifap;
609 	int err = -EINVAL;
610 
611 	ASSERT_RTNL();
612 
613 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
614 			  extack);
615 	if (err < 0)
616 		goto errout;
617 
618 	ifm = nlmsg_data(nlh);
619 	in_dev = inetdev_by_index(net, ifm->ifa_index);
620 	if (!in_dev) {
621 		err = -ENODEV;
622 		goto errout;
623 	}
624 
625 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
626 	     ifap = &ifa->ifa_next) {
627 		if (tb[IFA_LOCAL] &&
628 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
629 			continue;
630 
631 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
632 			continue;
633 
634 		if (tb[IFA_ADDRESS] &&
635 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
636 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
637 			continue;
638 
639 		if (ipv4_is_multicast(ifa->ifa_address))
640 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
641 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
642 		return 0;
643 	}
644 
645 	err = -EADDRNOTAVAIL;
646 errout:
647 	return err;
648 }
649 
650 #define INFINITY_LIFE_TIME	0xFFFFFFFF
651 
652 static void check_lifetime(struct work_struct *work)
653 {
654 	unsigned long now, next, next_sec, next_sched;
655 	struct in_ifaddr *ifa;
656 	struct hlist_node *n;
657 	int i;
658 
659 	now = jiffies;
660 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
661 
662 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
663 		bool change_needed = false;
664 
665 		rcu_read_lock();
666 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
667 			unsigned long age;
668 
669 			if (ifa->ifa_flags & IFA_F_PERMANENT)
670 				continue;
671 
672 			/* We try to batch several events at once. */
673 			age = (now - ifa->ifa_tstamp +
674 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
675 
676 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
677 			    age >= ifa->ifa_valid_lft) {
678 				change_needed = true;
679 			} else if (ifa->ifa_preferred_lft ==
680 				   INFINITY_LIFE_TIME) {
681 				continue;
682 			} else if (age >= ifa->ifa_preferred_lft) {
683 				if (time_before(ifa->ifa_tstamp +
684 						ifa->ifa_valid_lft * HZ, next))
685 					next = ifa->ifa_tstamp +
686 					       ifa->ifa_valid_lft * HZ;
687 
688 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
689 					change_needed = true;
690 			} else if (time_before(ifa->ifa_tstamp +
691 					       ifa->ifa_preferred_lft * HZ,
692 					       next)) {
693 				next = ifa->ifa_tstamp +
694 				       ifa->ifa_preferred_lft * HZ;
695 			}
696 		}
697 		rcu_read_unlock();
698 		if (!change_needed)
699 			continue;
700 		rtnl_lock();
701 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
702 			unsigned long age;
703 
704 			if (ifa->ifa_flags & IFA_F_PERMANENT)
705 				continue;
706 
707 			/* We try to batch several events at once. */
708 			age = (now - ifa->ifa_tstamp +
709 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
710 
711 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
712 			    age >= ifa->ifa_valid_lft) {
713 				struct in_ifaddr **ifap;
714 
715 				for (ifap = &ifa->ifa_dev->ifa_list;
716 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
717 					if (*ifap == ifa) {
718 						inet_del_ifa(ifa->ifa_dev,
719 							     ifap, 1);
720 						break;
721 					}
722 				}
723 			} else if (ifa->ifa_preferred_lft !=
724 				   INFINITY_LIFE_TIME &&
725 				   age >= ifa->ifa_preferred_lft &&
726 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
727 				ifa->ifa_flags |= IFA_F_DEPRECATED;
728 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
729 			}
730 		}
731 		rtnl_unlock();
732 	}
733 
734 	next_sec = round_jiffies_up(next);
735 	next_sched = next;
736 
737 	/* If rounded timeout is accurate enough, accept it. */
738 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
739 		next_sched = next_sec;
740 
741 	now = jiffies;
742 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
743 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
744 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
745 
746 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
747 			next_sched - now);
748 }
749 
750 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
751 			     __u32 prefered_lft)
752 {
753 	unsigned long timeout;
754 
755 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
756 
757 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
758 	if (addrconf_finite_timeout(timeout))
759 		ifa->ifa_valid_lft = timeout;
760 	else
761 		ifa->ifa_flags |= IFA_F_PERMANENT;
762 
763 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
764 	if (addrconf_finite_timeout(timeout)) {
765 		if (timeout == 0)
766 			ifa->ifa_flags |= IFA_F_DEPRECATED;
767 		ifa->ifa_preferred_lft = timeout;
768 	}
769 	ifa->ifa_tstamp = jiffies;
770 	if (!ifa->ifa_cstamp)
771 		ifa->ifa_cstamp = ifa->ifa_tstamp;
772 }
773 
774 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
775 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
776 {
777 	struct nlattr *tb[IFA_MAX+1];
778 	struct in_ifaddr *ifa;
779 	struct ifaddrmsg *ifm;
780 	struct net_device *dev;
781 	struct in_device *in_dev;
782 	int err;
783 
784 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
785 			  NULL);
786 	if (err < 0)
787 		goto errout;
788 
789 	ifm = nlmsg_data(nlh);
790 	err = -EINVAL;
791 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
792 		goto errout;
793 
794 	dev = __dev_get_by_index(net, ifm->ifa_index);
795 	err = -ENODEV;
796 	if (!dev)
797 		goto errout;
798 
799 	in_dev = __in_dev_get_rtnl(dev);
800 	err = -ENOBUFS;
801 	if (!in_dev)
802 		goto errout;
803 
804 	ifa = inet_alloc_ifa();
805 	if (!ifa)
806 		/*
807 		 * A potential indev allocation can be left alive, it stays
808 		 * assigned to its device and is destroy with it.
809 		 */
810 		goto errout;
811 
812 	ipv4_devconf_setall(in_dev);
813 	neigh_parms_data_state_setall(in_dev->arp_parms);
814 	in_dev_hold(in_dev);
815 
816 	if (!tb[IFA_ADDRESS])
817 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
818 
819 	INIT_HLIST_NODE(&ifa->hash);
820 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
821 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
822 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
823 					 ifm->ifa_flags;
824 	ifa->ifa_scope = ifm->ifa_scope;
825 	ifa->ifa_dev = in_dev;
826 
827 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
828 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
829 
830 	if (tb[IFA_BROADCAST])
831 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
832 
833 	if (tb[IFA_LABEL])
834 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
835 	else
836 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
837 
838 	if (tb[IFA_CACHEINFO]) {
839 		struct ifa_cacheinfo *ci;
840 
841 		ci = nla_data(tb[IFA_CACHEINFO]);
842 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
843 			err = -EINVAL;
844 			goto errout_free;
845 		}
846 		*pvalid_lft = ci->ifa_valid;
847 		*pprefered_lft = ci->ifa_prefered;
848 	}
849 
850 	return ifa;
851 
852 errout_free:
853 	inet_free_ifa(ifa);
854 errout:
855 	return ERR_PTR(err);
856 }
857 
858 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
859 {
860 	struct in_device *in_dev = ifa->ifa_dev;
861 	struct in_ifaddr *ifa1, **ifap;
862 
863 	if (!ifa->ifa_local)
864 		return NULL;
865 
866 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
867 	     ifap = &ifa1->ifa_next) {
868 		if (ifa1->ifa_mask == ifa->ifa_mask &&
869 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
870 		    ifa1->ifa_local == ifa->ifa_local)
871 			return ifa1;
872 	}
873 	return NULL;
874 }
875 
876 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
877 			    struct netlink_ext_ack *extack)
878 {
879 	struct net *net = sock_net(skb->sk);
880 	struct in_ifaddr *ifa;
881 	struct in_ifaddr *ifa_existing;
882 	__u32 valid_lft = INFINITY_LIFE_TIME;
883 	__u32 prefered_lft = INFINITY_LIFE_TIME;
884 
885 	ASSERT_RTNL();
886 
887 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
888 	if (IS_ERR(ifa))
889 		return PTR_ERR(ifa);
890 
891 	ifa_existing = find_matching_ifa(ifa);
892 	if (!ifa_existing) {
893 		/* It would be best to check for !NLM_F_CREATE here but
894 		 * userspace already relies on not having to provide this.
895 		 */
896 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
897 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
898 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
899 					       true, ifa);
900 
901 			if (ret < 0) {
902 				inet_free_ifa(ifa);
903 				return ret;
904 			}
905 		}
906 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
907 					 extack);
908 	} else {
909 		inet_free_ifa(ifa);
910 
911 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
912 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
913 			return -EEXIST;
914 		ifa = ifa_existing;
915 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
916 		cancel_delayed_work(&check_lifetime_work);
917 		queue_delayed_work(system_power_efficient_wq,
918 				&check_lifetime_work, 0);
919 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
920 	}
921 	return 0;
922 }
923 
924 /*
925  *	Determine a default network mask, based on the IP address.
926  */
927 
928 static int inet_abc_len(__be32 addr)
929 {
930 	int rc = -1;	/* Something else, probably a multicast. */
931 
932 	if (ipv4_is_zeronet(addr))
933 		rc = 0;
934 	else {
935 		__u32 haddr = ntohl(addr);
936 
937 		if (IN_CLASSA(haddr))
938 			rc = 8;
939 		else if (IN_CLASSB(haddr))
940 			rc = 16;
941 		else if (IN_CLASSC(haddr))
942 			rc = 24;
943 	}
944 
945 	return rc;
946 }
947 
948 
949 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
950 {
951 	struct ifreq ifr;
952 	struct sockaddr_in sin_orig;
953 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
954 	struct in_device *in_dev;
955 	struct in_ifaddr **ifap = NULL;
956 	struct in_ifaddr *ifa = NULL;
957 	struct net_device *dev;
958 	char *colon;
959 	int ret = -EFAULT;
960 	int tryaddrmatch = 0;
961 
962 	/*
963 	 *	Fetch the caller's info block into kernel space
964 	 */
965 
966 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
967 		goto out;
968 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
969 
970 	/* save original address for comparison */
971 	memcpy(&sin_orig, sin, sizeof(*sin));
972 
973 	colon = strchr(ifr.ifr_name, ':');
974 	if (colon)
975 		*colon = 0;
976 
977 	dev_load(net, ifr.ifr_name);
978 
979 	switch (cmd) {
980 	case SIOCGIFADDR:	/* Get interface address */
981 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
982 	case SIOCGIFDSTADDR:	/* Get the destination address */
983 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
984 		/* Note that these ioctls will not sleep,
985 		   so that we do not impose a lock.
986 		   One day we will be forced to put shlock here (I mean SMP)
987 		 */
988 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
989 		memset(sin, 0, sizeof(*sin));
990 		sin->sin_family = AF_INET;
991 		break;
992 
993 	case SIOCSIFFLAGS:
994 		ret = -EPERM;
995 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
996 			goto out;
997 		break;
998 	case SIOCSIFADDR:	/* Set interface address (and family) */
999 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1000 	case SIOCSIFDSTADDR:	/* Set the destination address */
1001 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1002 		ret = -EPERM;
1003 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1004 			goto out;
1005 		ret = -EINVAL;
1006 		if (sin->sin_family != AF_INET)
1007 			goto out;
1008 		break;
1009 	default:
1010 		ret = -EINVAL;
1011 		goto out;
1012 	}
1013 
1014 	rtnl_lock();
1015 
1016 	ret = -ENODEV;
1017 	dev = __dev_get_by_name(net, ifr.ifr_name);
1018 	if (!dev)
1019 		goto done;
1020 
1021 	if (colon)
1022 		*colon = ':';
1023 
1024 	in_dev = __in_dev_get_rtnl(dev);
1025 	if (in_dev) {
1026 		if (tryaddrmatch) {
1027 			/* Matthias Andree */
1028 			/* compare label and address (4.4BSD style) */
1029 			/* note: we only do this for a limited set of ioctls
1030 			   and only if the original address family was AF_INET.
1031 			   This is checked above. */
1032 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1033 			     ifap = &ifa->ifa_next) {
1034 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1035 				    sin_orig.sin_addr.s_addr ==
1036 							ifa->ifa_local) {
1037 					break; /* found */
1038 				}
1039 			}
1040 		}
1041 		/* we didn't get a match, maybe the application is
1042 		   4.3BSD-style and passed in junk so we fall back to
1043 		   comparing just the label */
1044 		if (!ifa) {
1045 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1046 			     ifap = &ifa->ifa_next)
1047 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1048 					break;
1049 		}
1050 	}
1051 
1052 	ret = -EADDRNOTAVAIL;
1053 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1054 		goto done;
1055 
1056 	switch (cmd) {
1057 	case SIOCGIFADDR:	/* Get interface address */
1058 		sin->sin_addr.s_addr = ifa->ifa_local;
1059 		goto rarok;
1060 
1061 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1062 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1063 		goto rarok;
1064 
1065 	case SIOCGIFDSTADDR:	/* Get the destination address */
1066 		sin->sin_addr.s_addr = ifa->ifa_address;
1067 		goto rarok;
1068 
1069 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1070 		sin->sin_addr.s_addr = ifa->ifa_mask;
1071 		goto rarok;
1072 
1073 	case SIOCSIFFLAGS:
1074 		if (colon) {
1075 			ret = -EADDRNOTAVAIL;
1076 			if (!ifa)
1077 				break;
1078 			ret = 0;
1079 			if (!(ifr.ifr_flags & IFF_UP))
1080 				inet_del_ifa(in_dev, ifap, 1);
1081 			break;
1082 		}
1083 		ret = dev_change_flags(dev, ifr.ifr_flags);
1084 		break;
1085 
1086 	case SIOCSIFADDR:	/* Set interface address (and family) */
1087 		ret = -EINVAL;
1088 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1089 			break;
1090 
1091 		if (!ifa) {
1092 			ret = -ENOBUFS;
1093 			ifa = inet_alloc_ifa();
1094 			if (!ifa)
1095 				break;
1096 			INIT_HLIST_NODE(&ifa->hash);
1097 			if (colon)
1098 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1099 			else
1100 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1101 		} else {
1102 			ret = 0;
1103 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1104 				break;
1105 			inet_del_ifa(in_dev, ifap, 0);
1106 			ifa->ifa_broadcast = 0;
1107 			ifa->ifa_scope = 0;
1108 		}
1109 
1110 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1111 
1112 		if (!(dev->flags & IFF_POINTOPOINT)) {
1113 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1114 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1115 			if ((dev->flags & IFF_BROADCAST) &&
1116 			    ifa->ifa_prefixlen < 31)
1117 				ifa->ifa_broadcast = ifa->ifa_address |
1118 						     ~ifa->ifa_mask;
1119 		} else {
1120 			ifa->ifa_prefixlen = 32;
1121 			ifa->ifa_mask = inet_make_mask(32);
1122 		}
1123 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1124 		ret = inet_set_ifa(dev, ifa);
1125 		break;
1126 
1127 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1128 		ret = 0;
1129 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1130 			inet_del_ifa(in_dev, ifap, 0);
1131 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1132 			inet_insert_ifa(ifa);
1133 		}
1134 		break;
1135 
1136 	case SIOCSIFDSTADDR:	/* Set the destination address */
1137 		ret = 0;
1138 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1139 			break;
1140 		ret = -EINVAL;
1141 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1142 			break;
1143 		ret = 0;
1144 		inet_del_ifa(in_dev, ifap, 0);
1145 		ifa->ifa_address = sin->sin_addr.s_addr;
1146 		inet_insert_ifa(ifa);
1147 		break;
1148 
1149 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1150 
1151 		/*
1152 		 *	The mask we set must be legal.
1153 		 */
1154 		ret = -EINVAL;
1155 		if (bad_mask(sin->sin_addr.s_addr, 0))
1156 			break;
1157 		ret = 0;
1158 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1159 			__be32 old_mask = ifa->ifa_mask;
1160 			inet_del_ifa(in_dev, ifap, 0);
1161 			ifa->ifa_mask = sin->sin_addr.s_addr;
1162 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1163 
1164 			/* See if current broadcast address matches
1165 			 * with current netmask, then recalculate
1166 			 * the broadcast address. Otherwise it's a
1167 			 * funny address, so don't touch it since
1168 			 * the user seems to know what (s)he's doing...
1169 			 */
1170 			if ((dev->flags & IFF_BROADCAST) &&
1171 			    (ifa->ifa_prefixlen < 31) &&
1172 			    (ifa->ifa_broadcast ==
1173 			     (ifa->ifa_local|~old_mask))) {
1174 				ifa->ifa_broadcast = (ifa->ifa_local |
1175 						      ~sin->sin_addr.s_addr);
1176 			}
1177 			inet_insert_ifa(ifa);
1178 		}
1179 		break;
1180 	}
1181 done:
1182 	rtnl_unlock();
1183 out:
1184 	return ret;
1185 rarok:
1186 	rtnl_unlock();
1187 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1188 	goto out;
1189 }
1190 
1191 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1192 {
1193 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1194 	struct in_ifaddr *ifa;
1195 	struct ifreq ifr;
1196 	int done = 0;
1197 
1198 	if (!in_dev)
1199 		goto out;
1200 
1201 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1202 		if (!buf) {
1203 			done += sizeof(ifr);
1204 			continue;
1205 		}
1206 		if (len < (int) sizeof(ifr))
1207 			break;
1208 		memset(&ifr, 0, sizeof(struct ifreq));
1209 		strcpy(ifr.ifr_name, ifa->ifa_label);
1210 
1211 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1212 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1213 								ifa->ifa_local;
1214 
1215 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1216 			done = -EFAULT;
1217 			break;
1218 		}
1219 		buf  += sizeof(struct ifreq);
1220 		len  -= sizeof(struct ifreq);
1221 		done += sizeof(struct ifreq);
1222 	}
1223 out:
1224 	return done;
1225 }
1226 
1227 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1228 				 int scope)
1229 {
1230 	for_primary_ifa(in_dev) {
1231 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1232 		    ifa->ifa_scope <= scope)
1233 			return ifa->ifa_local;
1234 	} endfor_ifa(in_dev);
1235 
1236 	return 0;
1237 }
1238 
1239 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1240 {
1241 	__be32 addr = 0;
1242 	struct in_device *in_dev;
1243 	struct net *net = dev_net(dev);
1244 	int master_idx;
1245 
1246 	rcu_read_lock();
1247 	in_dev = __in_dev_get_rcu(dev);
1248 	if (!in_dev)
1249 		goto no_in_dev;
1250 
1251 	for_primary_ifa(in_dev) {
1252 		if (ifa->ifa_scope > scope)
1253 			continue;
1254 		if (!dst || inet_ifa_match(dst, ifa)) {
1255 			addr = ifa->ifa_local;
1256 			break;
1257 		}
1258 		if (!addr)
1259 			addr = ifa->ifa_local;
1260 	} endfor_ifa(in_dev);
1261 
1262 	if (addr)
1263 		goto out_unlock;
1264 no_in_dev:
1265 	master_idx = l3mdev_master_ifindex_rcu(dev);
1266 
1267 	/* For VRFs, the VRF device takes the place of the loopback device,
1268 	 * with addresses on it being preferred.  Note in such cases the
1269 	 * loopback device will be among the devices that fail the master_idx
1270 	 * equality check in the loop below.
1271 	 */
1272 	if (master_idx &&
1273 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1274 	    (in_dev = __in_dev_get_rcu(dev))) {
1275 		addr = in_dev_select_addr(in_dev, scope);
1276 		if (addr)
1277 			goto out_unlock;
1278 	}
1279 
1280 	/* Not loopback addresses on loopback should be preferred
1281 	   in this case. It is important that lo is the first interface
1282 	   in dev_base list.
1283 	 */
1284 	for_each_netdev_rcu(net, dev) {
1285 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1286 			continue;
1287 
1288 		in_dev = __in_dev_get_rcu(dev);
1289 		if (!in_dev)
1290 			continue;
1291 
1292 		addr = in_dev_select_addr(in_dev, scope);
1293 		if (addr)
1294 			goto out_unlock;
1295 	}
1296 out_unlock:
1297 	rcu_read_unlock();
1298 	return addr;
1299 }
1300 EXPORT_SYMBOL(inet_select_addr);
1301 
1302 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1303 			      __be32 local, int scope)
1304 {
1305 	int same = 0;
1306 	__be32 addr = 0;
1307 
1308 	for_ifa(in_dev) {
1309 		if (!addr &&
1310 		    (local == ifa->ifa_local || !local) &&
1311 		    ifa->ifa_scope <= scope) {
1312 			addr = ifa->ifa_local;
1313 			if (same)
1314 				break;
1315 		}
1316 		if (!same) {
1317 			same = (!local || inet_ifa_match(local, ifa)) &&
1318 				(!dst || inet_ifa_match(dst, ifa));
1319 			if (same && addr) {
1320 				if (local || !dst)
1321 					break;
1322 				/* Is the selected addr into dst subnet? */
1323 				if (inet_ifa_match(addr, ifa))
1324 					break;
1325 				/* No, then can we use new local src? */
1326 				if (ifa->ifa_scope <= scope) {
1327 					addr = ifa->ifa_local;
1328 					break;
1329 				}
1330 				/* search for large dst subnet for addr */
1331 				same = 0;
1332 			}
1333 		}
1334 	} endfor_ifa(in_dev);
1335 
1336 	return same ? addr : 0;
1337 }
1338 
1339 /*
1340  * Confirm that local IP address exists using wildcards:
1341  * - net: netns to check, cannot be NULL
1342  * - in_dev: only on this interface, NULL=any interface
1343  * - dst: only in the same subnet as dst, 0=any dst
1344  * - local: address, 0=autoselect the local address
1345  * - scope: maximum allowed scope value for the local address
1346  */
1347 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1348 			 __be32 dst, __be32 local, int scope)
1349 {
1350 	__be32 addr = 0;
1351 	struct net_device *dev;
1352 
1353 	if (in_dev)
1354 		return confirm_addr_indev(in_dev, dst, local, scope);
1355 
1356 	rcu_read_lock();
1357 	for_each_netdev_rcu(net, dev) {
1358 		in_dev = __in_dev_get_rcu(dev);
1359 		if (in_dev) {
1360 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1361 			if (addr)
1362 				break;
1363 		}
1364 	}
1365 	rcu_read_unlock();
1366 
1367 	return addr;
1368 }
1369 EXPORT_SYMBOL(inet_confirm_addr);
1370 
1371 /*
1372  *	Device notifier
1373  */
1374 
1375 int register_inetaddr_notifier(struct notifier_block *nb)
1376 {
1377 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1378 }
1379 EXPORT_SYMBOL(register_inetaddr_notifier);
1380 
1381 int unregister_inetaddr_notifier(struct notifier_block *nb)
1382 {
1383 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1384 }
1385 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1386 
1387 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1388 {
1389 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1390 }
1391 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1392 
1393 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1394 {
1395 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1396 	    nb);
1397 }
1398 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1399 
1400 /* Rename ifa_labels for a device name change. Make some effort to preserve
1401  * existing alias numbering and to create unique labels if possible.
1402 */
1403 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1404 {
1405 	struct in_ifaddr *ifa;
1406 	int named = 0;
1407 
1408 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1409 		char old[IFNAMSIZ], *dot;
1410 
1411 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1412 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1413 		if (named++ == 0)
1414 			goto skip;
1415 		dot = strchr(old, ':');
1416 		if (!dot) {
1417 			sprintf(old, ":%d", named);
1418 			dot = old;
1419 		}
1420 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1421 			strcat(ifa->ifa_label, dot);
1422 		else
1423 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1424 skip:
1425 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1426 	}
1427 }
1428 
1429 static bool inetdev_valid_mtu(unsigned int mtu)
1430 {
1431 	return mtu >= 68;
1432 }
1433 
1434 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1435 					struct in_device *in_dev)
1436 
1437 {
1438 	struct in_ifaddr *ifa;
1439 
1440 	for (ifa = in_dev->ifa_list; ifa;
1441 	     ifa = ifa->ifa_next) {
1442 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1443 			 ifa->ifa_local, dev,
1444 			 ifa->ifa_local, NULL,
1445 			 dev->dev_addr, NULL);
1446 	}
1447 }
1448 
1449 /* Called only under RTNL semaphore */
1450 
1451 static int inetdev_event(struct notifier_block *this, unsigned long event,
1452 			 void *ptr)
1453 {
1454 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1455 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1456 
1457 	ASSERT_RTNL();
1458 
1459 	if (!in_dev) {
1460 		if (event == NETDEV_REGISTER) {
1461 			in_dev = inetdev_init(dev);
1462 			if (IS_ERR(in_dev))
1463 				return notifier_from_errno(PTR_ERR(in_dev));
1464 			if (dev->flags & IFF_LOOPBACK) {
1465 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1466 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1467 			}
1468 		} else if (event == NETDEV_CHANGEMTU) {
1469 			/* Re-enabling IP */
1470 			if (inetdev_valid_mtu(dev->mtu))
1471 				in_dev = inetdev_init(dev);
1472 		}
1473 		goto out;
1474 	}
1475 
1476 	switch (event) {
1477 	case NETDEV_REGISTER:
1478 		pr_debug("%s: bug\n", __func__);
1479 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1480 		break;
1481 	case NETDEV_UP:
1482 		if (!inetdev_valid_mtu(dev->mtu))
1483 			break;
1484 		if (dev->flags & IFF_LOOPBACK) {
1485 			struct in_ifaddr *ifa = inet_alloc_ifa();
1486 
1487 			if (ifa) {
1488 				INIT_HLIST_NODE(&ifa->hash);
1489 				ifa->ifa_local =
1490 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1491 				ifa->ifa_prefixlen = 8;
1492 				ifa->ifa_mask = inet_make_mask(8);
1493 				in_dev_hold(in_dev);
1494 				ifa->ifa_dev = in_dev;
1495 				ifa->ifa_scope = RT_SCOPE_HOST;
1496 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1497 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1498 						 INFINITY_LIFE_TIME);
1499 				ipv4_devconf_setall(in_dev);
1500 				neigh_parms_data_state_setall(in_dev->arp_parms);
1501 				inet_insert_ifa(ifa);
1502 			}
1503 		}
1504 		ip_mc_up(in_dev);
1505 		/* fall through */
1506 	case NETDEV_CHANGEADDR:
1507 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1508 			break;
1509 		/* fall through */
1510 	case NETDEV_NOTIFY_PEERS:
1511 		/* Send gratuitous ARP to notify of link change */
1512 		inetdev_send_gratuitous_arp(dev, in_dev);
1513 		break;
1514 	case NETDEV_DOWN:
1515 		ip_mc_down(in_dev);
1516 		break;
1517 	case NETDEV_PRE_TYPE_CHANGE:
1518 		ip_mc_unmap(in_dev);
1519 		break;
1520 	case NETDEV_POST_TYPE_CHANGE:
1521 		ip_mc_remap(in_dev);
1522 		break;
1523 	case NETDEV_CHANGEMTU:
1524 		if (inetdev_valid_mtu(dev->mtu))
1525 			break;
1526 		/* disable IP when MTU is not enough */
1527 		/* fall through */
1528 	case NETDEV_UNREGISTER:
1529 		inetdev_destroy(in_dev);
1530 		break;
1531 	case NETDEV_CHANGENAME:
1532 		/* Do not notify about label change, this event is
1533 		 * not interesting to applications using netlink.
1534 		 */
1535 		inetdev_changename(dev, in_dev);
1536 
1537 		devinet_sysctl_unregister(in_dev);
1538 		devinet_sysctl_register(in_dev);
1539 		break;
1540 	}
1541 out:
1542 	return NOTIFY_DONE;
1543 }
1544 
1545 static struct notifier_block ip_netdev_notifier = {
1546 	.notifier_call = inetdev_event,
1547 };
1548 
1549 static size_t inet_nlmsg_size(void)
1550 {
1551 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1552 	       + nla_total_size(4) /* IFA_ADDRESS */
1553 	       + nla_total_size(4) /* IFA_LOCAL */
1554 	       + nla_total_size(4) /* IFA_BROADCAST */
1555 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1556 	       + nla_total_size(4)  /* IFA_FLAGS */
1557 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1558 }
1559 
1560 static inline u32 cstamp_delta(unsigned long cstamp)
1561 {
1562 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1563 }
1564 
1565 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1566 			 unsigned long tstamp, u32 preferred, u32 valid)
1567 {
1568 	struct ifa_cacheinfo ci;
1569 
1570 	ci.cstamp = cstamp_delta(cstamp);
1571 	ci.tstamp = cstamp_delta(tstamp);
1572 	ci.ifa_prefered = preferred;
1573 	ci.ifa_valid = valid;
1574 
1575 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1576 }
1577 
1578 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1579 			    u32 portid, u32 seq, int event, unsigned int flags)
1580 {
1581 	struct ifaddrmsg *ifm;
1582 	struct nlmsghdr  *nlh;
1583 	u32 preferred, valid;
1584 
1585 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1586 	if (!nlh)
1587 		return -EMSGSIZE;
1588 
1589 	ifm = nlmsg_data(nlh);
1590 	ifm->ifa_family = AF_INET;
1591 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1592 	ifm->ifa_flags = ifa->ifa_flags;
1593 	ifm->ifa_scope = ifa->ifa_scope;
1594 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1595 
1596 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1597 		preferred = ifa->ifa_preferred_lft;
1598 		valid = ifa->ifa_valid_lft;
1599 		if (preferred != INFINITY_LIFE_TIME) {
1600 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1601 
1602 			if (preferred > tval)
1603 				preferred -= tval;
1604 			else
1605 				preferred = 0;
1606 			if (valid != INFINITY_LIFE_TIME) {
1607 				if (valid > tval)
1608 					valid -= tval;
1609 				else
1610 					valid = 0;
1611 			}
1612 		}
1613 	} else {
1614 		preferred = INFINITY_LIFE_TIME;
1615 		valid = INFINITY_LIFE_TIME;
1616 	}
1617 	if ((ifa->ifa_address &&
1618 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1619 	    (ifa->ifa_local &&
1620 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1621 	    (ifa->ifa_broadcast &&
1622 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1623 	    (ifa->ifa_label[0] &&
1624 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1625 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1626 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1627 			  preferred, valid))
1628 		goto nla_put_failure;
1629 
1630 	nlmsg_end(skb, nlh);
1631 	return 0;
1632 
1633 nla_put_failure:
1634 	nlmsg_cancel(skb, nlh);
1635 	return -EMSGSIZE;
1636 }
1637 
1638 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1639 {
1640 	struct net *net = sock_net(skb->sk);
1641 	int h, s_h;
1642 	int idx, s_idx;
1643 	int ip_idx, s_ip_idx;
1644 	struct net_device *dev;
1645 	struct in_device *in_dev;
1646 	struct in_ifaddr *ifa;
1647 	struct hlist_head *head;
1648 
1649 	s_h = cb->args[0];
1650 	s_idx = idx = cb->args[1];
1651 	s_ip_idx = ip_idx = cb->args[2];
1652 
1653 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1654 		idx = 0;
1655 		head = &net->dev_index_head[h];
1656 		rcu_read_lock();
1657 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1658 			  net->dev_base_seq;
1659 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1660 			if (idx < s_idx)
1661 				goto cont;
1662 			if (h > s_h || idx > s_idx)
1663 				s_ip_idx = 0;
1664 			in_dev = __in_dev_get_rcu(dev);
1665 			if (!in_dev)
1666 				goto cont;
1667 
1668 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1669 			     ifa = ifa->ifa_next, ip_idx++) {
1670 				if (ip_idx < s_ip_idx)
1671 					continue;
1672 				if (inet_fill_ifaddr(skb, ifa,
1673 					     NETLINK_CB(cb->skb).portid,
1674 					     cb->nlh->nlmsg_seq,
1675 					     RTM_NEWADDR, NLM_F_MULTI) < 0) {
1676 					rcu_read_unlock();
1677 					goto done;
1678 				}
1679 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1680 			}
1681 cont:
1682 			idx++;
1683 		}
1684 		rcu_read_unlock();
1685 	}
1686 
1687 done:
1688 	cb->args[0] = h;
1689 	cb->args[1] = idx;
1690 	cb->args[2] = ip_idx;
1691 
1692 	return skb->len;
1693 }
1694 
1695 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1696 		      u32 portid)
1697 {
1698 	struct sk_buff *skb;
1699 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1700 	int err = -ENOBUFS;
1701 	struct net *net;
1702 
1703 	net = dev_net(ifa->ifa_dev->dev);
1704 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1705 	if (!skb)
1706 		goto errout;
1707 
1708 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1709 	if (err < 0) {
1710 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1711 		WARN_ON(err == -EMSGSIZE);
1712 		kfree_skb(skb);
1713 		goto errout;
1714 	}
1715 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1716 	return;
1717 errout:
1718 	if (err < 0)
1719 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1720 }
1721 
1722 static size_t inet_get_link_af_size(const struct net_device *dev,
1723 				    u32 ext_filter_mask)
1724 {
1725 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1726 
1727 	if (!in_dev)
1728 		return 0;
1729 
1730 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1731 }
1732 
1733 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1734 			     u32 ext_filter_mask)
1735 {
1736 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1737 	struct nlattr *nla;
1738 	int i;
1739 
1740 	if (!in_dev)
1741 		return -ENODATA;
1742 
1743 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1744 	if (!nla)
1745 		return -EMSGSIZE;
1746 
1747 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1748 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1749 
1750 	return 0;
1751 }
1752 
1753 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1754 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1755 };
1756 
1757 static int inet_validate_link_af(const struct net_device *dev,
1758 				 const struct nlattr *nla)
1759 {
1760 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1761 	int err, rem;
1762 
1763 	if (dev && !__in_dev_get_rcu(dev))
1764 		return -EAFNOSUPPORT;
1765 
1766 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1767 	if (err < 0)
1768 		return err;
1769 
1770 	if (tb[IFLA_INET_CONF]) {
1771 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1772 			int cfgid = nla_type(a);
1773 
1774 			if (nla_len(a) < 4)
1775 				return -EINVAL;
1776 
1777 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1778 				return -EINVAL;
1779 		}
1780 	}
1781 
1782 	return 0;
1783 }
1784 
1785 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1786 {
1787 	struct in_device *in_dev = __in_dev_get_rcu(dev);
1788 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1789 	int rem;
1790 
1791 	if (!in_dev)
1792 		return -EAFNOSUPPORT;
1793 
1794 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1795 		BUG();
1796 
1797 	if (tb[IFLA_INET_CONF]) {
1798 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1799 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1800 	}
1801 
1802 	return 0;
1803 }
1804 
1805 static int inet_netconf_msgsize_devconf(int type)
1806 {
1807 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1808 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1809 	bool all = false;
1810 
1811 	if (type == NETCONFA_ALL)
1812 		all = true;
1813 
1814 	if (all || type == NETCONFA_FORWARDING)
1815 		size += nla_total_size(4);
1816 	if (all || type == NETCONFA_RP_FILTER)
1817 		size += nla_total_size(4);
1818 	if (all || type == NETCONFA_MC_FORWARDING)
1819 		size += nla_total_size(4);
1820 	if (all || type == NETCONFA_PROXY_NEIGH)
1821 		size += nla_total_size(4);
1822 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1823 		size += nla_total_size(4);
1824 
1825 	return size;
1826 }
1827 
1828 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1829 				     struct ipv4_devconf *devconf, u32 portid,
1830 				     u32 seq, int event, unsigned int flags,
1831 				     int type)
1832 {
1833 	struct nlmsghdr  *nlh;
1834 	struct netconfmsg *ncm;
1835 	bool all = false;
1836 
1837 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1838 			flags);
1839 	if (!nlh)
1840 		return -EMSGSIZE;
1841 
1842 	if (type == NETCONFA_ALL)
1843 		all = true;
1844 
1845 	ncm = nlmsg_data(nlh);
1846 	ncm->ncm_family = AF_INET;
1847 
1848 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1849 		goto nla_put_failure;
1850 
1851 	if (!devconf)
1852 		goto out;
1853 
1854 	if ((all || type == NETCONFA_FORWARDING) &&
1855 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1856 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1857 		goto nla_put_failure;
1858 	if ((all || type == NETCONFA_RP_FILTER) &&
1859 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1860 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1861 		goto nla_put_failure;
1862 	if ((all || type == NETCONFA_MC_FORWARDING) &&
1863 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1864 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1865 		goto nla_put_failure;
1866 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
1867 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1868 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1869 		goto nla_put_failure;
1870 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1871 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1872 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1873 		goto nla_put_failure;
1874 
1875 out:
1876 	nlmsg_end(skb, nlh);
1877 	return 0;
1878 
1879 nla_put_failure:
1880 	nlmsg_cancel(skb, nlh);
1881 	return -EMSGSIZE;
1882 }
1883 
1884 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1885 				 int ifindex, struct ipv4_devconf *devconf)
1886 {
1887 	struct sk_buff *skb;
1888 	int err = -ENOBUFS;
1889 
1890 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1891 	if (!skb)
1892 		goto errout;
1893 
1894 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1895 					event, 0, type);
1896 	if (err < 0) {
1897 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1898 		WARN_ON(err == -EMSGSIZE);
1899 		kfree_skb(skb);
1900 		goto errout;
1901 	}
1902 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1903 	return;
1904 errout:
1905 	if (err < 0)
1906 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1907 }
1908 
1909 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1910 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1911 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1912 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1913 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1914 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
1915 };
1916 
1917 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1918 				    struct nlmsghdr *nlh,
1919 				    struct netlink_ext_ack *extack)
1920 {
1921 	struct net *net = sock_net(in_skb->sk);
1922 	struct nlattr *tb[NETCONFA_MAX+1];
1923 	struct netconfmsg *ncm;
1924 	struct sk_buff *skb;
1925 	struct ipv4_devconf *devconf;
1926 	struct in_device *in_dev;
1927 	struct net_device *dev;
1928 	int ifindex;
1929 	int err;
1930 
1931 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1932 			  devconf_ipv4_policy, extack);
1933 	if (err < 0)
1934 		goto errout;
1935 
1936 	err = -EINVAL;
1937 	if (!tb[NETCONFA_IFINDEX])
1938 		goto errout;
1939 
1940 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1941 	switch (ifindex) {
1942 	case NETCONFA_IFINDEX_ALL:
1943 		devconf = net->ipv4.devconf_all;
1944 		break;
1945 	case NETCONFA_IFINDEX_DEFAULT:
1946 		devconf = net->ipv4.devconf_dflt;
1947 		break;
1948 	default:
1949 		dev = __dev_get_by_index(net, ifindex);
1950 		if (!dev)
1951 			goto errout;
1952 		in_dev = __in_dev_get_rtnl(dev);
1953 		if (!in_dev)
1954 			goto errout;
1955 		devconf = &in_dev->cnf;
1956 		break;
1957 	}
1958 
1959 	err = -ENOBUFS;
1960 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1961 	if (!skb)
1962 		goto errout;
1963 
1964 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1965 					NETLINK_CB(in_skb).portid,
1966 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1967 					NETCONFA_ALL);
1968 	if (err < 0) {
1969 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1970 		WARN_ON(err == -EMSGSIZE);
1971 		kfree_skb(skb);
1972 		goto errout;
1973 	}
1974 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1975 errout:
1976 	return err;
1977 }
1978 
1979 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1980 				     struct netlink_callback *cb)
1981 {
1982 	struct net *net = sock_net(skb->sk);
1983 	int h, s_h;
1984 	int idx, s_idx;
1985 	struct net_device *dev;
1986 	struct in_device *in_dev;
1987 	struct hlist_head *head;
1988 
1989 	s_h = cb->args[0];
1990 	s_idx = idx = cb->args[1];
1991 
1992 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1993 		idx = 0;
1994 		head = &net->dev_index_head[h];
1995 		rcu_read_lock();
1996 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1997 			  net->dev_base_seq;
1998 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1999 			if (idx < s_idx)
2000 				goto cont;
2001 			in_dev = __in_dev_get_rcu(dev);
2002 			if (!in_dev)
2003 				goto cont;
2004 
2005 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2006 						      &in_dev->cnf,
2007 						      NETLINK_CB(cb->skb).portid,
2008 						      cb->nlh->nlmsg_seq,
2009 						      RTM_NEWNETCONF,
2010 						      NLM_F_MULTI,
2011 						      NETCONFA_ALL) < 0) {
2012 				rcu_read_unlock();
2013 				goto done;
2014 			}
2015 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2016 cont:
2017 			idx++;
2018 		}
2019 		rcu_read_unlock();
2020 	}
2021 	if (h == NETDEV_HASHENTRIES) {
2022 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2023 					      net->ipv4.devconf_all,
2024 					      NETLINK_CB(cb->skb).portid,
2025 					      cb->nlh->nlmsg_seq,
2026 					      RTM_NEWNETCONF, NLM_F_MULTI,
2027 					      NETCONFA_ALL) < 0)
2028 			goto done;
2029 		else
2030 			h++;
2031 	}
2032 	if (h == NETDEV_HASHENTRIES + 1) {
2033 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2034 					      net->ipv4.devconf_dflt,
2035 					      NETLINK_CB(cb->skb).portid,
2036 					      cb->nlh->nlmsg_seq,
2037 					      RTM_NEWNETCONF, NLM_F_MULTI,
2038 					      NETCONFA_ALL) < 0)
2039 			goto done;
2040 		else
2041 			h++;
2042 	}
2043 done:
2044 	cb->args[0] = h;
2045 	cb->args[1] = idx;
2046 
2047 	return skb->len;
2048 }
2049 
2050 #ifdef CONFIG_SYSCTL
2051 
2052 static void devinet_copy_dflt_conf(struct net *net, int i)
2053 {
2054 	struct net_device *dev;
2055 
2056 	rcu_read_lock();
2057 	for_each_netdev_rcu(net, dev) {
2058 		struct in_device *in_dev;
2059 
2060 		in_dev = __in_dev_get_rcu(dev);
2061 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2062 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2063 	}
2064 	rcu_read_unlock();
2065 }
2066 
2067 /* called with RTNL locked */
2068 static void inet_forward_change(struct net *net)
2069 {
2070 	struct net_device *dev;
2071 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2072 
2073 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2074 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2075 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2076 				    NETCONFA_FORWARDING,
2077 				    NETCONFA_IFINDEX_ALL,
2078 				    net->ipv4.devconf_all);
2079 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2080 				    NETCONFA_FORWARDING,
2081 				    NETCONFA_IFINDEX_DEFAULT,
2082 				    net->ipv4.devconf_dflt);
2083 
2084 	for_each_netdev(net, dev) {
2085 		struct in_device *in_dev;
2086 
2087 		if (on)
2088 			dev_disable_lro(dev);
2089 
2090 		in_dev = __in_dev_get_rtnl(dev);
2091 		if (in_dev) {
2092 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2093 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2094 						    NETCONFA_FORWARDING,
2095 						    dev->ifindex, &in_dev->cnf);
2096 		}
2097 	}
2098 }
2099 
2100 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2101 {
2102 	if (cnf == net->ipv4.devconf_dflt)
2103 		return NETCONFA_IFINDEX_DEFAULT;
2104 	else if (cnf == net->ipv4.devconf_all)
2105 		return NETCONFA_IFINDEX_ALL;
2106 	else {
2107 		struct in_device *idev
2108 			= container_of(cnf, struct in_device, cnf);
2109 		return idev->dev->ifindex;
2110 	}
2111 }
2112 
2113 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2114 			     void __user *buffer,
2115 			     size_t *lenp, loff_t *ppos)
2116 {
2117 	int old_value = *(int *)ctl->data;
2118 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2119 	int new_value = *(int *)ctl->data;
2120 
2121 	if (write) {
2122 		struct ipv4_devconf *cnf = ctl->extra1;
2123 		struct net *net = ctl->extra2;
2124 		int i = (int *)ctl->data - cnf->data;
2125 		int ifindex;
2126 
2127 		set_bit(i, cnf->state);
2128 
2129 		if (cnf == net->ipv4.devconf_dflt)
2130 			devinet_copy_dflt_conf(net, i);
2131 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2132 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2133 			if ((new_value == 0) && (old_value != 0))
2134 				rt_cache_flush(net);
2135 
2136 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2137 		    new_value != old_value) {
2138 			ifindex = devinet_conf_ifindex(net, cnf);
2139 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2140 						    NETCONFA_RP_FILTER,
2141 						    ifindex, cnf);
2142 		}
2143 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2144 		    new_value != old_value) {
2145 			ifindex = devinet_conf_ifindex(net, cnf);
2146 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2147 						    NETCONFA_PROXY_NEIGH,
2148 						    ifindex, cnf);
2149 		}
2150 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2151 		    new_value != old_value) {
2152 			ifindex = devinet_conf_ifindex(net, cnf);
2153 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2154 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2155 						    ifindex, cnf);
2156 		}
2157 	}
2158 
2159 	return ret;
2160 }
2161 
2162 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2163 				  void __user *buffer,
2164 				  size_t *lenp, loff_t *ppos)
2165 {
2166 	int *valp = ctl->data;
2167 	int val = *valp;
2168 	loff_t pos = *ppos;
2169 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2170 
2171 	if (write && *valp != val) {
2172 		struct net *net = ctl->extra2;
2173 
2174 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2175 			if (!rtnl_trylock()) {
2176 				/* Restore the original values before restarting */
2177 				*valp = val;
2178 				*ppos = pos;
2179 				return restart_syscall();
2180 			}
2181 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2182 				inet_forward_change(net);
2183 			} else {
2184 				struct ipv4_devconf *cnf = ctl->extra1;
2185 				struct in_device *idev =
2186 					container_of(cnf, struct in_device, cnf);
2187 				if (*valp)
2188 					dev_disable_lro(idev->dev);
2189 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2190 							    NETCONFA_FORWARDING,
2191 							    idev->dev->ifindex,
2192 							    cnf);
2193 			}
2194 			rtnl_unlock();
2195 			rt_cache_flush(net);
2196 		} else
2197 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2198 						    NETCONFA_FORWARDING,
2199 						    NETCONFA_IFINDEX_DEFAULT,
2200 						    net->ipv4.devconf_dflt);
2201 	}
2202 
2203 	return ret;
2204 }
2205 
2206 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2207 				void __user *buffer,
2208 				size_t *lenp, loff_t *ppos)
2209 {
2210 	int *valp = ctl->data;
2211 	int val = *valp;
2212 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2213 	struct net *net = ctl->extra2;
2214 
2215 	if (write && *valp != val)
2216 		rt_cache_flush(net);
2217 
2218 	return ret;
2219 }
2220 
2221 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2222 	{ \
2223 		.procname	= name, \
2224 		.data		= ipv4_devconf.data + \
2225 				  IPV4_DEVCONF_ ## attr - 1, \
2226 		.maxlen		= sizeof(int), \
2227 		.mode		= mval, \
2228 		.proc_handler	= proc, \
2229 		.extra1		= &ipv4_devconf, \
2230 	}
2231 
2232 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2233 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2234 
2235 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2236 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2237 
2238 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2239 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2240 
2241 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2242 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2243 
2244 static struct devinet_sysctl_table {
2245 	struct ctl_table_header *sysctl_header;
2246 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2247 } devinet_sysctl = {
2248 	.devinet_vars = {
2249 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2250 					     devinet_sysctl_forward),
2251 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2252 
2253 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2254 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2255 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2256 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2257 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2258 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2259 					"accept_source_route"),
2260 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2261 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2262 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2263 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2264 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2265 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2266 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2267 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2268 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2269 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2270 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2271 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2272 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2273 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2274 					"force_igmp_version"),
2275 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2276 					"igmpv2_unsolicited_report_interval"),
2277 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2278 					"igmpv3_unsolicited_report_interval"),
2279 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2280 					"ignore_routes_with_linkdown"),
2281 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2282 					"drop_gratuitous_arp"),
2283 
2284 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2285 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2286 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2287 					      "promote_secondaries"),
2288 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2289 					      "route_localnet"),
2290 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2291 					      "drop_unicast_in_l2_multicast"),
2292 	},
2293 };
2294 
2295 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2296 				     int ifindex, struct ipv4_devconf *p)
2297 {
2298 	int i;
2299 	struct devinet_sysctl_table *t;
2300 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2301 
2302 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2303 	if (!t)
2304 		goto out;
2305 
2306 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2307 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2308 		t->devinet_vars[i].extra1 = p;
2309 		t->devinet_vars[i].extra2 = net;
2310 	}
2311 
2312 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2313 
2314 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2315 	if (!t->sysctl_header)
2316 		goto free;
2317 
2318 	p->sysctl = t;
2319 
2320 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2321 				    ifindex, p);
2322 	return 0;
2323 
2324 free:
2325 	kfree(t);
2326 out:
2327 	return -ENOBUFS;
2328 }
2329 
2330 static void __devinet_sysctl_unregister(struct net *net,
2331 					struct ipv4_devconf *cnf, int ifindex)
2332 {
2333 	struct devinet_sysctl_table *t = cnf->sysctl;
2334 
2335 	if (t) {
2336 		cnf->sysctl = NULL;
2337 		unregister_net_sysctl_table(t->sysctl_header);
2338 		kfree(t);
2339 	}
2340 
2341 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2342 }
2343 
2344 static int devinet_sysctl_register(struct in_device *idev)
2345 {
2346 	int err;
2347 
2348 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2349 		return -EINVAL;
2350 
2351 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2352 	if (err)
2353 		return err;
2354 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2355 					idev->dev->ifindex, &idev->cnf);
2356 	if (err)
2357 		neigh_sysctl_unregister(idev->arp_parms);
2358 	return err;
2359 }
2360 
2361 static void devinet_sysctl_unregister(struct in_device *idev)
2362 {
2363 	struct net *net = dev_net(idev->dev);
2364 
2365 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2366 	neigh_sysctl_unregister(idev->arp_parms);
2367 }
2368 
2369 static struct ctl_table ctl_forward_entry[] = {
2370 	{
2371 		.procname	= "ip_forward",
2372 		.data		= &ipv4_devconf.data[
2373 					IPV4_DEVCONF_FORWARDING - 1],
2374 		.maxlen		= sizeof(int),
2375 		.mode		= 0644,
2376 		.proc_handler	= devinet_sysctl_forward,
2377 		.extra1		= &ipv4_devconf,
2378 		.extra2		= &init_net,
2379 	},
2380 	{ },
2381 };
2382 #endif
2383 
2384 static __net_init int devinet_init_net(struct net *net)
2385 {
2386 	int err;
2387 	struct ipv4_devconf *all, *dflt;
2388 #ifdef CONFIG_SYSCTL
2389 	struct ctl_table *tbl = ctl_forward_entry;
2390 	struct ctl_table_header *forw_hdr;
2391 #endif
2392 
2393 	err = -ENOMEM;
2394 	all = &ipv4_devconf;
2395 	dflt = &ipv4_devconf_dflt;
2396 
2397 	if (!net_eq(net, &init_net)) {
2398 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2399 		if (!all)
2400 			goto err_alloc_all;
2401 
2402 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2403 		if (!dflt)
2404 			goto err_alloc_dflt;
2405 
2406 #ifdef CONFIG_SYSCTL
2407 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2408 		if (!tbl)
2409 			goto err_alloc_ctl;
2410 
2411 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2412 		tbl[0].extra1 = all;
2413 		tbl[0].extra2 = net;
2414 #endif
2415 	}
2416 
2417 #ifdef CONFIG_SYSCTL
2418 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2419 	if (err < 0)
2420 		goto err_reg_all;
2421 
2422 	err = __devinet_sysctl_register(net, "default",
2423 					NETCONFA_IFINDEX_DEFAULT, dflt);
2424 	if (err < 0)
2425 		goto err_reg_dflt;
2426 
2427 	err = -ENOMEM;
2428 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2429 	if (!forw_hdr)
2430 		goto err_reg_ctl;
2431 	net->ipv4.forw_hdr = forw_hdr;
2432 #endif
2433 
2434 	net->ipv4.devconf_all = all;
2435 	net->ipv4.devconf_dflt = dflt;
2436 	return 0;
2437 
2438 #ifdef CONFIG_SYSCTL
2439 err_reg_ctl:
2440 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2441 err_reg_dflt:
2442 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2443 err_reg_all:
2444 	if (tbl != ctl_forward_entry)
2445 		kfree(tbl);
2446 err_alloc_ctl:
2447 #endif
2448 	if (dflt != &ipv4_devconf_dflt)
2449 		kfree(dflt);
2450 err_alloc_dflt:
2451 	if (all != &ipv4_devconf)
2452 		kfree(all);
2453 err_alloc_all:
2454 	return err;
2455 }
2456 
2457 static __net_exit void devinet_exit_net(struct net *net)
2458 {
2459 #ifdef CONFIG_SYSCTL
2460 	struct ctl_table *tbl;
2461 
2462 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2463 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2464 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2465 				    NETCONFA_IFINDEX_DEFAULT);
2466 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2467 				    NETCONFA_IFINDEX_ALL);
2468 	kfree(tbl);
2469 #endif
2470 	kfree(net->ipv4.devconf_dflt);
2471 	kfree(net->ipv4.devconf_all);
2472 }
2473 
2474 static __net_initdata struct pernet_operations devinet_ops = {
2475 	.init = devinet_init_net,
2476 	.exit = devinet_exit_net,
2477 };
2478 
2479 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2480 	.family		  = AF_INET,
2481 	.fill_link_af	  = inet_fill_link_af,
2482 	.get_link_af_size = inet_get_link_af_size,
2483 	.validate_link_af = inet_validate_link_af,
2484 	.set_link_af	  = inet_set_link_af,
2485 };
2486 
2487 void __init devinet_init(void)
2488 {
2489 	int i;
2490 
2491 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2492 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2493 
2494 	register_pernet_subsys(&devinet_ops);
2495 
2496 	register_gifconf(PF_INET, inet_gifconf);
2497 	register_netdevice_notifier(&ip_netdev_notifier);
2498 
2499 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2500 
2501 	rtnl_af_register(&inet_af_ops);
2502 
2503 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2504 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2505 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2506 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2507 		      inet_netconf_dump_devconf, 0);
2508 }
2509