xref: /openbmc/linux/net/ipv4/devinet.c (revision 9a29ad52)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60 
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77 	},
78 };
79 
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81 	.data = {
82 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89 	},
90 };
91 
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94 
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96 	[IFA_LOCAL]     	= { .type = NLA_U32 },
97 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
98 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
99 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
101 	[IFA_FLAGS]		= { .type = NLA_U32 },
102 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
103 };
104 
105 #define IN4_ADDR_HSIZE_SHIFT	8
106 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
107 
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109 
110 static u32 inet_addr_hash(const struct net *net, __be32 addr)
111 {
112 	u32 val = (__force u32) addr ^ net_hash_mix(net);
113 
114 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116 
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
120 
121 	ASSERT_RTNL();
122 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123 }
124 
125 static void inet_hash_remove(struct in_ifaddr *ifa)
126 {
127 	ASSERT_RTNL();
128 	hlist_del_init_rcu(&ifa->hash);
129 }
130 
131 /**
132  * __ip_dev_find - find the first device with a given source address.
133  * @net: the net namespace
134  * @addr: the source address
135  * @devref: if true, take a reference on the found device
136  *
137  * If a caller uses devref=false, it should be protected by RCU, or RTNL
138  */
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 {
141 	struct net_device *result = NULL;
142 	struct in_ifaddr *ifa;
143 
144 	rcu_read_lock();
145 	ifa = inet_lookup_ifaddr_rcu(net, addr);
146 	if (!ifa) {
147 		struct flowi4 fl4 = { .daddr = addr };
148 		struct fib_result res = { 0 };
149 		struct fib_table *local;
150 
151 		/* Fallback to FIB local table so that communication
152 		 * over loopback subnets work.
153 		 */
154 		local = fib_get_table(net, RT_TABLE_LOCAL);
155 		if (local &&
156 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
157 		    res.type == RTN_LOCAL)
158 			result = FIB_RES_DEV(res);
159 	} else {
160 		result = ifa->ifa_dev->dev;
161 	}
162 	if (result && devref)
163 		dev_hold(result);
164 	rcu_read_unlock();
165 	return result;
166 }
167 EXPORT_SYMBOL(__ip_dev_find);
168 
169 /* called under RCU lock */
170 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
171 {
172 	u32 hash = inet_addr_hash(net, addr);
173 	struct in_ifaddr *ifa;
174 
175 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
176 		if (ifa->ifa_local == addr &&
177 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
178 			return ifa;
179 
180 	return NULL;
181 }
182 
183 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
184 
185 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
186 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
187 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
188 			 int destroy);
189 #ifdef CONFIG_SYSCTL
190 static int devinet_sysctl_register(struct in_device *idev);
191 static void devinet_sysctl_unregister(struct in_device *idev);
192 #else
193 static int devinet_sysctl_register(struct in_device *idev)
194 {
195 	return 0;
196 }
197 static void devinet_sysctl_unregister(struct in_device *idev)
198 {
199 }
200 #endif
201 
202 /* Locks all the inet devices. */
203 
204 static struct in_ifaddr *inet_alloc_ifa(void)
205 {
206 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
207 }
208 
209 static void inet_rcu_free_ifa(struct rcu_head *head)
210 {
211 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
212 	if (ifa->ifa_dev)
213 		in_dev_put(ifa->ifa_dev);
214 	kfree(ifa);
215 }
216 
217 static void inet_free_ifa(struct in_ifaddr *ifa)
218 {
219 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
220 }
221 
222 void in_dev_finish_destroy(struct in_device *idev)
223 {
224 	struct net_device *dev = idev->dev;
225 
226 	WARN_ON(idev->ifa_list);
227 	WARN_ON(idev->mc_list);
228 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
229 #ifdef NET_REFCNT_DEBUG
230 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
231 #endif
232 	dev_put(dev);
233 	if (!idev->dead)
234 		pr_err("Freeing alive in_device %p\n", idev);
235 	else
236 		kfree(idev);
237 }
238 EXPORT_SYMBOL(in_dev_finish_destroy);
239 
240 static struct in_device *inetdev_init(struct net_device *dev)
241 {
242 	struct in_device *in_dev;
243 	int err = -ENOMEM;
244 
245 	ASSERT_RTNL();
246 
247 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
248 	if (!in_dev)
249 		goto out;
250 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
251 			sizeof(in_dev->cnf));
252 	in_dev->cnf.sysctl = NULL;
253 	in_dev->dev = dev;
254 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
255 	if (!in_dev->arp_parms)
256 		goto out_kfree;
257 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
258 		dev_disable_lro(dev);
259 	/* Reference in_dev->dev */
260 	dev_hold(dev);
261 	/* Account for reference dev->ip_ptr (below) */
262 	refcount_set(&in_dev->refcnt, 1);
263 
264 	err = devinet_sysctl_register(in_dev);
265 	if (err) {
266 		in_dev->dead = 1;
267 		in_dev_put(in_dev);
268 		in_dev = NULL;
269 		goto out;
270 	}
271 	ip_mc_init_dev(in_dev);
272 	if (dev->flags & IFF_UP)
273 		ip_mc_up(in_dev);
274 
275 	/* we can receive as soon as ip_ptr is set -- do this last */
276 	rcu_assign_pointer(dev->ip_ptr, in_dev);
277 out:
278 	return in_dev ?: ERR_PTR(err);
279 out_kfree:
280 	kfree(in_dev);
281 	in_dev = NULL;
282 	goto out;
283 }
284 
285 static void in_dev_rcu_put(struct rcu_head *head)
286 {
287 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
288 	in_dev_put(idev);
289 }
290 
291 static void inetdev_destroy(struct in_device *in_dev)
292 {
293 	struct in_ifaddr *ifa;
294 	struct net_device *dev;
295 
296 	ASSERT_RTNL();
297 
298 	dev = in_dev->dev;
299 
300 	in_dev->dead = 1;
301 
302 	ip_mc_destroy_dev(in_dev);
303 
304 	while ((ifa = in_dev->ifa_list) != NULL) {
305 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
306 		inet_free_ifa(ifa);
307 	}
308 
309 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
310 
311 	devinet_sysctl_unregister(in_dev);
312 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
313 	arp_ifdown(dev);
314 
315 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
316 }
317 
318 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
319 {
320 	rcu_read_lock();
321 	for_primary_ifa(in_dev) {
322 		if (inet_ifa_match(a, ifa)) {
323 			if (!b || inet_ifa_match(b, ifa)) {
324 				rcu_read_unlock();
325 				return 1;
326 			}
327 		}
328 	} endfor_ifa(in_dev);
329 	rcu_read_unlock();
330 	return 0;
331 }
332 
333 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
334 			 int destroy, struct nlmsghdr *nlh, u32 portid)
335 {
336 	struct in_ifaddr *promote = NULL;
337 	struct in_ifaddr *ifa, *ifa1 = *ifap;
338 	struct in_ifaddr *last_prim = in_dev->ifa_list;
339 	struct in_ifaddr *prev_prom = NULL;
340 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
341 
342 	ASSERT_RTNL();
343 
344 	if (in_dev->dead)
345 		goto no_promotions;
346 
347 	/* 1. Deleting primary ifaddr forces deletion all secondaries
348 	 * unless alias promotion is set
349 	 **/
350 
351 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
352 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
353 
354 		while ((ifa = *ifap1) != NULL) {
355 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
356 			    ifa1->ifa_scope <= ifa->ifa_scope)
357 				last_prim = ifa;
358 
359 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
360 			    ifa1->ifa_mask != ifa->ifa_mask ||
361 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
362 				ifap1 = &ifa->ifa_next;
363 				prev_prom = ifa;
364 				continue;
365 			}
366 
367 			if (!do_promote) {
368 				inet_hash_remove(ifa);
369 				*ifap1 = ifa->ifa_next;
370 
371 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
372 				blocking_notifier_call_chain(&inetaddr_chain,
373 						NETDEV_DOWN, ifa);
374 				inet_free_ifa(ifa);
375 			} else {
376 				promote = ifa;
377 				break;
378 			}
379 		}
380 	}
381 
382 	/* On promotion all secondaries from subnet are changing
383 	 * the primary IP, we must remove all their routes silently
384 	 * and later to add them back with new prefsrc. Do this
385 	 * while all addresses are on the device list.
386 	 */
387 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
388 		if (ifa1->ifa_mask == ifa->ifa_mask &&
389 		    inet_ifa_match(ifa1->ifa_address, ifa))
390 			fib_del_ifaddr(ifa, ifa1);
391 	}
392 
393 no_promotions:
394 	/* 2. Unlink it */
395 
396 	*ifap = ifa1->ifa_next;
397 	inet_hash_remove(ifa1);
398 
399 	/* 3. Announce address deletion */
400 
401 	/* Send message first, then call notifier.
402 	   At first sight, FIB update triggered by notifier
403 	   will refer to already deleted ifaddr, that could confuse
404 	   netlink listeners. It is not true: look, gated sees
405 	   that route deleted and if it still thinks that ifaddr
406 	   is valid, it will try to restore deleted routes... Grr.
407 	   So that, this order is correct.
408 	 */
409 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
410 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
411 
412 	if (promote) {
413 		struct in_ifaddr *next_sec = promote->ifa_next;
414 
415 		if (prev_prom) {
416 			prev_prom->ifa_next = promote->ifa_next;
417 			promote->ifa_next = last_prim->ifa_next;
418 			last_prim->ifa_next = promote;
419 		}
420 
421 		promote->ifa_flags &= ~IFA_F_SECONDARY;
422 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
423 		blocking_notifier_call_chain(&inetaddr_chain,
424 				NETDEV_UP, promote);
425 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
426 			if (ifa1->ifa_mask != ifa->ifa_mask ||
427 			    !inet_ifa_match(ifa1->ifa_address, ifa))
428 					continue;
429 			fib_add_ifaddr(ifa);
430 		}
431 
432 	}
433 	if (destroy)
434 		inet_free_ifa(ifa1);
435 }
436 
437 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
438 			 int destroy)
439 {
440 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
441 }
442 
443 static void check_lifetime(struct work_struct *work);
444 
445 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
446 
447 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
448 			     u32 portid, struct netlink_ext_ack *extack)
449 {
450 	struct in_device *in_dev = ifa->ifa_dev;
451 	struct in_ifaddr *ifa1, **ifap, **last_primary;
452 	struct in_validator_info ivi;
453 	int ret;
454 
455 	ASSERT_RTNL();
456 
457 	if (!ifa->ifa_local) {
458 		inet_free_ifa(ifa);
459 		return 0;
460 	}
461 
462 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
463 	last_primary = &in_dev->ifa_list;
464 
465 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
466 	     ifap = &ifa1->ifa_next) {
467 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
468 		    ifa->ifa_scope <= ifa1->ifa_scope)
469 			last_primary = &ifa1->ifa_next;
470 		if (ifa1->ifa_mask == ifa->ifa_mask &&
471 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
472 			if (ifa1->ifa_local == ifa->ifa_local) {
473 				inet_free_ifa(ifa);
474 				return -EEXIST;
475 			}
476 			if (ifa1->ifa_scope != ifa->ifa_scope) {
477 				inet_free_ifa(ifa);
478 				return -EINVAL;
479 			}
480 			ifa->ifa_flags |= IFA_F_SECONDARY;
481 		}
482 	}
483 
484 	/* Allow any devices that wish to register ifaddr validtors to weigh
485 	 * in now, before changes are committed.  The rntl lock is serializing
486 	 * access here, so the state should not change between a validator call
487 	 * and a final notify on commit.  This isn't invoked on promotion under
488 	 * the assumption that validators are checking the address itself, and
489 	 * not the flags.
490 	 */
491 	ivi.ivi_addr = ifa->ifa_address;
492 	ivi.ivi_dev = ifa->ifa_dev;
493 	ivi.extack = extack;
494 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
495 					   NETDEV_UP, &ivi);
496 	ret = notifier_to_errno(ret);
497 	if (ret) {
498 		inet_free_ifa(ifa);
499 		return ret;
500 	}
501 
502 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
503 		prandom_seed((__force u32) ifa->ifa_local);
504 		ifap = last_primary;
505 	}
506 
507 	ifa->ifa_next = *ifap;
508 	*ifap = ifa;
509 
510 	inet_hash_insert(dev_net(in_dev->dev), ifa);
511 
512 	cancel_delayed_work(&check_lifetime_work);
513 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
514 
515 	/* Send message first, then call notifier.
516 	   Notifier will trigger FIB update, so that
517 	   listeners of netlink will know about new ifaddr */
518 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
519 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
520 
521 	return 0;
522 }
523 
524 static int inet_insert_ifa(struct in_ifaddr *ifa)
525 {
526 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
527 }
528 
529 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
530 {
531 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
532 
533 	ASSERT_RTNL();
534 
535 	if (!in_dev) {
536 		inet_free_ifa(ifa);
537 		return -ENOBUFS;
538 	}
539 	ipv4_devconf_setall(in_dev);
540 	neigh_parms_data_state_setall(in_dev->arp_parms);
541 	if (ifa->ifa_dev != in_dev) {
542 		WARN_ON(ifa->ifa_dev);
543 		in_dev_hold(in_dev);
544 		ifa->ifa_dev = in_dev;
545 	}
546 	if (ipv4_is_loopback(ifa->ifa_local))
547 		ifa->ifa_scope = RT_SCOPE_HOST;
548 	return inet_insert_ifa(ifa);
549 }
550 
551 /* Caller must hold RCU or RTNL :
552  * We dont take a reference on found in_device
553  */
554 struct in_device *inetdev_by_index(struct net *net, int ifindex)
555 {
556 	struct net_device *dev;
557 	struct in_device *in_dev = NULL;
558 
559 	rcu_read_lock();
560 	dev = dev_get_by_index_rcu(net, ifindex);
561 	if (dev)
562 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
563 	rcu_read_unlock();
564 	return in_dev;
565 }
566 EXPORT_SYMBOL(inetdev_by_index);
567 
568 /* Called only from RTNL semaphored context. No locks. */
569 
570 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
571 				    __be32 mask)
572 {
573 	ASSERT_RTNL();
574 
575 	for_primary_ifa(in_dev) {
576 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
577 			return ifa;
578 	} endfor_ifa(in_dev);
579 	return NULL;
580 }
581 
582 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
583 {
584 	struct ip_mreqn mreq = {
585 		.imr_multiaddr.s_addr = ifa->ifa_address,
586 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
587 	};
588 	int ret;
589 
590 	ASSERT_RTNL();
591 
592 	lock_sock(sk);
593 	if (join)
594 		ret = ip_mc_join_group(sk, &mreq);
595 	else
596 		ret = ip_mc_leave_group(sk, &mreq);
597 	release_sock(sk);
598 
599 	return ret;
600 }
601 
602 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
603 			    struct netlink_ext_ack *extack)
604 {
605 	struct net *net = sock_net(skb->sk);
606 	struct nlattr *tb[IFA_MAX+1];
607 	struct in_device *in_dev;
608 	struct ifaddrmsg *ifm;
609 	struct in_ifaddr *ifa, **ifap;
610 	int err = -EINVAL;
611 
612 	ASSERT_RTNL();
613 
614 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
615 			  extack);
616 	if (err < 0)
617 		goto errout;
618 
619 	ifm = nlmsg_data(nlh);
620 	in_dev = inetdev_by_index(net, ifm->ifa_index);
621 	if (!in_dev) {
622 		err = -ENODEV;
623 		goto errout;
624 	}
625 
626 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
627 	     ifap = &ifa->ifa_next) {
628 		if (tb[IFA_LOCAL] &&
629 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
630 			continue;
631 
632 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
633 			continue;
634 
635 		if (tb[IFA_ADDRESS] &&
636 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
637 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
638 			continue;
639 
640 		if (ipv4_is_multicast(ifa->ifa_address))
641 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
642 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
643 		return 0;
644 	}
645 
646 	err = -EADDRNOTAVAIL;
647 errout:
648 	return err;
649 }
650 
651 #define INFINITY_LIFE_TIME	0xFFFFFFFF
652 
653 static void check_lifetime(struct work_struct *work)
654 {
655 	unsigned long now, next, next_sec, next_sched;
656 	struct in_ifaddr *ifa;
657 	struct hlist_node *n;
658 	int i;
659 
660 	now = jiffies;
661 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
662 
663 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
664 		bool change_needed = false;
665 
666 		rcu_read_lock();
667 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
668 			unsigned long age;
669 
670 			if (ifa->ifa_flags & IFA_F_PERMANENT)
671 				continue;
672 
673 			/* We try to batch several events at once. */
674 			age = (now - ifa->ifa_tstamp +
675 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
676 
677 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
678 			    age >= ifa->ifa_valid_lft) {
679 				change_needed = true;
680 			} else if (ifa->ifa_preferred_lft ==
681 				   INFINITY_LIFE_TIME) {
682 				continue;
683 			} else if (age >= ifa->ifa_preferred_lft) {
684 				if (time_before(ifa->ifa_tstamp +
685 						ifa->ifa_valid_lft * HZ, next))
686 					next = ifa->ifa_tstamp +
687 					       ifa->ifa_valid_lft * HZ;
688 
689 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
690 					change_needed = true;
691 			} else if (time_before(ifa->ifa_tstamp +
692 					       ifa->ifa_preferred_lft * HZ,
693 					       next)) {
694 				next = ifa->ifa_tstamp +
695 				       ifa->ifa_preferred_lft * HZ;
696 			}
697 		}
698 		rcu_read_unlock();
699 		if (!change_needed)
700 			continue;
701 		rtnl_lock();
702 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
703 			unsigned long age;
704 
705 			if (ifa->ifa_flags & IFA_F_PERMANENT)
706 				continue;
707 
708 			/* We try to batch several events at once. */
709 			age = (now - ifa->ifa_tstamp +
710 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
711 
712 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
713 			    age >= ifa->ifa_valid_lft) {
714 				struct in_ifaddr **ifap;
715 
716 				for (ifap = &ifa->ifa_dev->ifa_list;
717 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
718 					if (*ifap == ifa) {
719 						inet_del_ifa(ifa->ifa_dev,
720 							     ifap, 1);
721 						break;
722 					}
723 				}
724 			} else if (ifa->ifa_preferred_lft !=
725 				   INFINITY_LIFE_TIME &&
726 				   age >= ifa->ifa_preferred_lft &&
727 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
728 				ifa->ifa_flags |= IFA_F_DEPRECATED;
729 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
730 			}
731 		}
732 		rtnl_unlock();
733 	}
734 
735 	next_sec = round_jiffies_up(next);
736 	next_sched = next;
737 
738 	/* If rounded timeout is accurate enough, accept it. */
739 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
740 		next_sched = next_sec;
741 
742 	now = jiffies;
743 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
744 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
745 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
746 
747 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
748 			next_sched - now);
749 }
750 
751 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
752 			     __u32 prefered_lft)
753 {
754 	unsigned long timeout;
755 
756 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
757 
758 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
759 	if (addrconf_finite_timeout(timeout))
760 		ifa->ifa_valid_lft = timeout;
761 	else
762 		ifa->ifa_flags |= IFA_F_PERMANENT;
763 
764 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
765 	if (addrconf_finite_timeout(timeout)) {
766 		if (timeout == 0)
767 			ifa->ifa_flags |= IFA_F_DEPRECATED;
768 		ifa->ifa_preferred_lft = timeout;
769 	}
770 	ifa->ifa_tstamp = jiffies;
771 	if (!ifa->ifa_cstamp)
772 		ifa->ifa_cstamp = ifa->ifa_tstamp;
773 }
774 
775 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
776 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
777 {
778 	struct nlattr *tb[IFA_MAX+1];
779 	struct in_ifaddr *ifa;
780 	struct ifaddrmsg *ifm;
781 	struct net_device *dev;
782 	struct in_device *in_dev;
783 	int err;
784 
785 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
786 			  NULL);
787 	if (err < 0)
788 		goto errout;
789 
790 	ifm = nlmsg_data(nlh);
791 	err = -EINVAL;
792 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
793 		goto errout;
794 
795 	dev = __dev_get_by_index(net, ifm->ifa_index);
796 	err = -ENODEV;
797 	if (!dev)
798 		goto errout;
799 
800 	in_dev = __in_dev_get_rtnl(dev);
801 	err = -ENOBUFS;
802 	if (!in_dev)
803 		goto errout;
804 
805 	ifa = inet_alloc_ifa();
806 	if (!ifa)
807 		/*
808 		 * A potential indev allocation can be left alive, it stays
809 		 * assigned to its device and is destroy with it.
810 		 */
811 		goto errout;
812 
813 	ipv4_devconf_setall(in_dev);
814 	neigh_parms_data_state_setall(in_dev->arp_parms);
815 	in_dev_hold(in_dev);
816 
817 	if (!tb[IFA_ADDRESS])
818 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
819 
820 	INIT_HLIST_NODE(&ifa->hash);
821 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
822 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
823 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
824 					 ifm->ifa_flags;
825 	ifa->ifa_scope = ifm->ifa_scope;
826 	ifa->ifa_dev = in_dev;
827 
828 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
829 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
830 
831 	if (tb[IFA_BROADCAST])
832 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
833 
834 	if (tb[IFA_LABEL])
835 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
836 	else
837 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
838 
839 	if (tb[IFA_RT_PRIORITY])
840 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
841 
842 	if (tb[IFA_CACHEINFO]) {
843 		struct ifa_cacheinfo *ci;
844 
845 		ci = nla_data(tb[IFA_CACHEINFO]);
846 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
847 			err = -EINVAL;
848 			goto errout_free;
849 		}
850 		*pvalid_lft = ci->ifa_valid;
851 		*pprefered_lft = ci->ifa_prefered;
852 	}
853 
854 	return ifa;
855 
856 errout_free:
857 	inet_free_ifa(ifa);
858 errout:
859 	return ERR_PTR(err);
860 }
861 
862 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
863 {
864 	struct in_device *in_dev = ifa->ifa_dev;
865 	struct in_ifaddr *ifa1, **ifap;
866 
867 	if (!ifa->ifa_local)
868 		return NULL;
869 
870 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
871 	     ifap = &ifa1->ifa_next) {
872 		if (ifa1->ifa_mask == ifa->ifa_mask &&
873 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
874 		    ifa1->ifa_local == ifa->ifa_local)
875 			return ifa1;
876 	}
877 	return NULL;
878 }
879 
880 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
881 			    struct netlink_ext_ack *extack)
882 {
883 	struct net *net = sock_net(skb->sk);
884 	struct in_ifaddr *ifa;
885 	struct in_ifaddr *ifa_existing;
886 	__u32 valid_lft = INFINITY_LIFE_TIME;
887 	__u32 prefered_lft = INFINITY_LIFE_TIME;
888 
889 	ASSERT_RTNL();
890 
891 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
892 	if (IS_ERR(ifa))
893 		return PTR_ERR(ifa);
894 
895 	ifa_existing = find_matching_ifa(ifa);
896 	if (!ifa_existing) {
897 		/* It would be best to check for !NLM_F_CREATE here but
898 		 * userspace already relies on not having to provide this.
899 		 */
900 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
901 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
902 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
903 					       true, ifa);
904 
905 			if (ret < 0) {
906 				inet_free_ifa(ifa);
907 				return ret;
908 			}
909 		}
910 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
911 					 extack);
912 	} else {
913 		u32 new_metric = ifa->ifa_rt_priority;
914 
915 		inet_free_ifa(ifa);
916 
917 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
918 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
919 			return -EEXIST;
920 		ifa = ifa_existing;
921 
922 		if (ifa->ifa_rt_priority != new_metric) {
923 			fib_modify_prefix_metric(ifa, new_metric);
924 			ifa->ifa_rt_priority = new_metric;
925 		}
926 
927 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
928 		cancel_delayed_work(&check_lifetime_work);
929 		queue_delayed_work(system_power_efficient_wq,
930 				&check_lifetime_work, 0);
931 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
932 	}
933 	return 0;
934 }
935 
936 /*
937  *	Determine a default network mask, based on the IP address.
938  */
939 
940 static int inet_abc_len(__be32 addr)
941 {
942 	int rc = -1;	/* Something else, probably a multicast. */
943 
944 	if (ipv4_is_zeronet(addr))
945 		rc = 0;
946 	else {
947 		__u32 haddr = ntohl(addr);
948 
949 		if (IN_CLASSA(haddr))
950 			rc = 8;
951 		else if (IN_CLASSB(haddr))
952 			rc = 16;
953 		else if (IN_CLASSC(haddr))
954 			rc = 24;
955 	}
956 
957 	return rc;
958 }
959 
960 
961 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
962 {
963 	struct sockaddr_in sin_orig;
964 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
965 	struct in_device *in_dev;
966 	struct in_ifaddr **ifap = NULL;
967 	struct in_ifaddr *ifa = NULL;
968 	struct net_device *dev;
969 	char *colon;
970 	int ret = -EFAULT;
971 	int tryaddrmatch = 0;
972 
973 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
974 
975 	/* save original address for comparison */
976 	memcpy(&sin_orig, sin, sizeof(*sin));
977 
978 	colon = strchr(ifr->ifr_name, ':');
979 	if (colon)
980 		*colon = 0;
981 
982 	dev_load(net, ifr->ifr_name);
983 
984 	switch (cmd) {
985 	case SIOCGIFADDR:	/* Get interface address */
986 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
987 	case SIOCGIFDSTADDR:	/* Get the destination address */
988 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
989 		/* Note that these ioctls will not sleep,
990 		   so that we do not impose a lock.
991 		   One day we will be forced to put shlock here (I mean SMP)
992 		 */
993 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
994 		memset(sin, 0, sizeof(*sin));
995 		sin->sin_family = AF_INET;
996 		break;
997 
998 	case SIOCSIFFLAGS:
999 		ret = -EPERM;
1000 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1001 			goto out;
1002 		break;
1003 	case SIOCSIFADDR:	/* Set interface address (and family) */
1004 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1005 	case SIOCSIFDSTADDR:	/* Set the destination address */
1006 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1007 		ret = -EPERM;
1008 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1009 			goto out;
1010 		ret = -EINVAL;
1011 		if (sin->sin_family != AF_INET)
1012 			goto out;
1013 		break;
1014 	default:
1015 		ret = -EINVAL;
1016 		goto out;
1017 	}
1018 
1019 	rtnl_lock();
1020 
1021 	ret = -ENODEV;
1022 	dev = __dev_get_by_name(net, ifr->ifr_name);
1023 	if (!dev)
1024 		goto done;
1025 
1026 	if (colon)
1027 		*colon = ':';
1028 
1029 	in_dev = __in_dev_get_rtnl(dev);
1030 	if (in_dev) {
1031 		if (tryaddrmatch) {
1032 			/* Matthias Andree */
1033 			/* compare label and address (4.4BSD style) */
1034 			/* note: we only do this for a limited set of ioctls
1035 			   and only if the original address family was AF_INET.
1036 			   This is checked above. */
1037 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1038 			     ifap = &ifa->ifa_next) {
1039 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1040 				    sin_orig.sin_addr.s_addr ==
1041 							ifa->ifa_local) {
1042 					break; /* found */
1043 				}
1044 			}
1045 		}
1046 		/* we didn't get a match, maybe the application is
1047 		   4.3BSD-style and passed in junk so we fall back to
1048 		   comparing just the label */
1049 		if (!ifa) {
1050 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1051 			     ifap = &ifa->ifa_next)
1052 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1053 					break;
1054 		}
1055 	}
1056 
1057 	ret = -EADDRNOTAVAIL;
1058 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1059 		goto done;
1060 
1061 	switch (cmd) {
1062 	case SIOCGIFADDR:	/* Get interface address */
1063 		ret = 0;
1064 		sin->sin_addr.s_addr = ifa->ifa_local;
1065 		break;
1066 
1067 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1068 		ret = 0;
1069 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1070 		break;
1071 
1072 	case SIOCGIFDSTADDR:	/* Get the destination address */
1073 		ret = 0;
1074 		sin->sin_addr.s_addr = ifa->ifa_address;
1075 		break;
1076 
1077 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1078 		ret = 0;
1079 		sin->sin_addr.s_addr = ifa->ifa_mask;
1080 		break;
1081 
1082 	case SIOCSIFFLAGS:
1083 		if (colon) {
1084 			ret = -EADDRNOTAVAIL;
1085 			if (!ifa)
1086 				break;
1087 			ret = 0;
1088 			if (!(ifr->ifr_flags & IFF_UP))
1089 				inet_del_ifa(in_dev, ifap, 1);
1090 			break;
1091 		}
1092 		ret = dev_change_flags(dev, ifr->ifr_flags);
1093 		break;
1094 
1095 	case SIOCSIFADDR:	/* Set interface address (and family) */
1096 		ret = -EINVAL;
1097 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1098 			break;
1099 
1100 		if (!ifa) {
1101 			ret = -ENOBUFS;
1102 			ifa = inet_alloc_ifa();
1103 			if (!ifa)
1104 				break;
1105 			INIT_HLIST_NODE(&ifa->hash);
1106 			if (colon)
1107 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1108 			else
1109 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1110 		} else {
1111 			ret = 0;
1112 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1113 				break;
1114 			inet_del_ifa(in_dev, ifap, 0);
1115 			ifa->ifa_broadcast = 0;
1116 			ifa->ifa_scope = 0;
1117 		}
1118 
1119 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1120 
1121 		if (!(dev->flags & IFF_POINTOPOINT)) {
1122 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1123 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1124 			if ((dev->flags & IFF_BROADCAST) &&
1125 			    ifa->ifa_prefixlen < 31)
1126 				ifa->ifa_broadcast = ifa->ifa_address |
1127 						     ~ifa->ifa_mask;
1128 		} else {
1129 			ifa->ifa_prefixlen = 32;
1130 			ifa->ifa_mask = inet_make_mask(32);
1131 		}
1132 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1133 		ret = inet_set_ifa(dev, ifa);
1134 		break;
1135 
1136 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1137 		ret = 0;
1138 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1139 			inet_del_ifa(in_dev, ifap, 0);
1140 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1141 			inet_insert_ifa(ifa);
1142 		}
1143 		break;
1144 
1145 	case SIOCSIFDSTADDR:	/* Set the destination address */
1146 		ret = 0;
1147 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1148 			break;
1149 		ret = -EINVAL;
1150 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1151 			break;
1152 		ret = 0;
1153 		inet_del_ifa(in_dev, ifap, 0);
1154 		ifa->ifa_address = sin->sin_addr.s_addr;
1155 		inet_insert_ifa(ifa);
1156 		break;
1157 
1158 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1159 
1160 		/*
1161 		 *	The mask we set must be legal.
1162 		 */
1163 		ret = -EINVAL;
1164 		if (bad_mask(sin->sin_addr.s_addr, 0))
1165 			break;
1166 		ret = 0;
1167 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1168 			__be32 old_mask = ifa->ifa_mask;
1169 			inet_del_ifa(in_dev, ifap, 0);
1170 			ifa->ifa_mask = sin->sin_addr.s_addr;
1171 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1172 
1173 			/* See if current broadcast address matches
1174 			 * with current netmask, then recalculate
1175 			 * the broadcast address. Otherwise it's a
1176 			 * funny address, so don't touch it since
1177 			 * the user seems to know what (s)he's doing...
1178 			 */
1179 			if ((dev->flags & IFF_BROADCAST) &&
1180 			    (ifa->ifa_prefixlen < 31) &&
1181 			    (ifa->ifa_broadcast ==
1182 			     (ifa->ifa_local|~old_mask))) {
1183 				ifa->ifa_broadcast = (ifa->ifa_local |
1184 						      ~sin->sin_addr.s_addr);
1185 			}
1186 			inet_insert_ifa(ifa);
1187 		}
1188 		break;
1189 	}
1190 done:
1191 	rtnl_unlock();
1192 out:
1193 	return ret;
1194 }
1195 
1196 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1197 {
1198 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1199 	struct in_ifaddr *ifa;
1200 	struct ifreq ifr;
1201 	int done = 0;
1202 
1203 	if (WARN_ON(size > sizeof(struct ifreq)))
1204 		goto out;
1205 
1206 	if (!in_dev)
1207 		goto out;
1208 
1209 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1210 		if (!buf) {
1211 			done += size;
1212 			continue;
1213 		}
1214 		if (len < size)
1215 			break;
1216 		memset(&ifr, 0, sizeof(struct ifreq));
1217 		strcpy(ifr.ifr_name, ifa->ifa_label);
1218 
1219 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1220 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1221 								ifa->ifa_local;
1222 
1223 		if (copy_to_user(buf + done, &ifr, size)) {
1224 			done = -EFAULT;
1225 			break;
1226 		}
1227 		len  -= size;
1228 		done += size;
1229 	}
1230 out:
1231 	return done;
1232 }
1233 
1234 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1235 				 int scope)
1236 {
1237 	for_primary_ifa(in_dev) {
1238 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1239 		    ifa->ifa_scope <= scope)
1240 			return ifa->ifa_local;
1241 	} endfor_ifa(in_dev);
1242 
1243 	return 0;
1244 }
1245 
1246 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1247 {
1248 	__be32 addr = 0;
1249 	struct in_device *in_dev;
1250 	struct net *net = dev_net(dev);
1251 	int master_idx;
1252 
1253 	rcu_read_lock();
1254 	in_dev = __in_dev_get_rcu(dev);
1255 	if (!in_dev)
1256 		goto no_in_dev;
1257 
1258 	for_primary_ifa(in_dev) {
1259 		if (ifa->ifa_scope > scope)
1260 			continue;
1261 		if (!dst || inet_ifa_match(dst, ifa)) {
1262 			addr = ifa->ifa_local;
1263 			break;
1264 		}
1265 		if (!addr)
1266 			addr = ifa->ifa_local;
1267 	} endfor_ifa(in_dev);
1268 
1269 	if (addr)
1270 		goto out_unlock;
1271 no_in_dev:
1272 	master_idx = l3mdev_master_ifindex_rcu(dev);
1273 
1274 	/* For VRFs, the VRF device takes the place of the loopback device,
1275 	 * with addresses on it being preferred.  Note in such cases the
1276 	 * loopback device will be among the devices that fail the master_idx
1277 	 * equality check in the loop below.
1278 	 */
1279 	if (master_idx &&
1280 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1281 	    (in_dev = __in_dev_get_rcu(dev))) {
1282 		addr = in_dev_select_addr(in_dev, scope);
1283 		if (addr)
1284 			goto out_unlock;
1285 	}
1286 
1287 	/* Not loopback addresses on loopback should be preferred
1288 	   in this case. It is important that lo is the first interface
1289 	   in dev_base list.
1290 	 */
1291 	for_each_netdev_rcu(net, dev) {
1292 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1293 			continue;
1294 
1295 		in_dev = __in_dev_get_rcu(dev);
1296 		if (!in_dev)
1297 			continue;
1298 
1299 		addr = in_dev_select_addr(in_dev, scope);
1300 		if (addr)
1301 			goto out_unlock;
1302 	}
1303 out_unlock:
1304 	rcu_read_unlock();
1305 	return addr;
1306 }
1307 EXPORT_SYMBOL(inet_select_addr);
1308 
1309 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1310 			      __be32 local, int scope)
1311 {
1312 	int same = 0;
1313 	__be32 addr = 0;
1314 
1315 	for_ifa(in_dev) {
1316 		if (!addr &&
1317 		    (local == ifa->ifa_local || !local) &&
1318 		    ifa->ifa_scope <= scope) {
1319 			addr = ifa->ifa_local;
1320 			if (same)
1321 				break;
1322 		}
1323 		if (!same) {
1324 			same = (!local || inet_ifa_match(local, ifa)) &&
1325 				(!dst || inet_ifa_match(dst, ifa));
1326 			if (same && addr) {
1327 				if (local || !dst)
1328 					break;
1329 				/* Is the selected addr into dst subnet? */
1330 				if (inet_ifa_match(addr, ifa))
1331 					break;
1332 				/* No, then can we use new local src? */
1333 				if (ifa->ifa_scope <= scope) {
1334 					addr = ifa->ifa_local;
1335 					break;
1336 				}
1337 				/* search for large dst subnet for addr */
1338 				same = 0;
1339 			}
1340 		}
1341 	} endfor_ifa(in_dev);
1342 
1343 	return same ? addr : 0;
1344 }
1345 
1346 /*
1347  * Confirm that local IP address exists using wildcards:
1348  * - net: netns to check, cannot be NULL
1349  * - in_dev: only on this interface, NULL=any interface
1350  * - dst: only in the same subnet as dst, 0=any dst
1351  * - local: address, 0=autoselect the local address
1352  * - scope: maximum allowed scope value for the local address
1353  */
1354 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1355 			 __be32 dst, __be32 local, int scope)
1356 {
1357 	__be32 addr = 0;
1358 	struct net_device *dev;
1359 
1360 	if (in_dev)
1361 		return confirm_addr_indev(in_dev, dst, local, scope);
1362 
1363 	rcu_read_lock();
1364 	for_each_netdev_rcu(net, dev) {
1365 		in_dev = __in_dev_get_rcu(dev);
1366 		if (in_dev) {
1367 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1368 			if (addr)
1369 				break;
1370 		}
1371 	}
1372 	rcu_read_unlock();
1373 
1374 	return addr;
1375 }
1376 EXPORT_SYMBOL(inet_confirm_addr);
1377 
1378 /*
1379  *	Device notifier
1380  */
1381 
1382 int register_inetaddr_notifier(struct notifier_block *nb)
1383 {
1384 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1385 }
1386 EXPORT_SYMBOL(register_inetaddr_notifier);
1387 
1388 int unregister_inetaddr_notifier(struct notifier_block *nb)
1389 {
1390 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1391 }
1392 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1393 
1394 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1395 {
1396 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1397 }
1398 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1399 
1400 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1401 {
1402 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1403 	    nb);
1404 }
1405 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1406 
1407 /* Rename ifa_labels for a device name change. Make some effort to preserve
1408  * existing alias numbering and to create unique labels if possible.
1409 */
1410 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1411 {
1412 	struct in_ifaddr *ifa;
1413 	int named = 0;
1414 
1415 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1416 		char old[IFNAMSIZ], *dot;
1417 
1418 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1419 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1420 		if (named++ == 0)
1421 			goto skip;
1422 		dot = strchr(old, ':');
1423 		if (!dot) {
1424 			sprintf(old, ":%d", named);
1425 			dot = old;
1426 		}
1427 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1428 			strcat(ifa->ifa_label, dot);
1429 		else
1430 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1431 skip:
1432 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1433 	}
1434 }
1435 
1436 static bool inetdev_valid_mtu(unsigned int mtu)
1437 {
1438 	return mtu >= IPV4_MIN_MTU;
1439 }
1440 
1441 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1442 					struct in_device *in_dev)
1443 
1444 {
1445 	struct in_ifaddr *ifa;
1446 
1447 	for (ifa = in_dev->ifa_list; ifa;
1448 	     ifa = ifa->ifa_next) {
1449 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1450 			 ifa->ifa_local, dev,
1451 			 ifa->ifa_local, NULL,
1452 			 dev->dev_addr, NULL);
1453 	}
1454 }
1455 
1456 /* Called only under RTNL semaphore */
1457 
1458 static int inetdev_event(struct notifier_block *this, unsigned long event,
1459 			 void *ptr)
1460 {
1461 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1462 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1463 
1464 	ASSERT_RTNL();
1465 
1466 	if (!in_dev) {
1467 		if (event == NETDEV_REGISTER) {
1468 			in_dev = inetdev_init(dev);
1469 			if (IS_ERR(in_dev))
1470 				return notifier_from_errno(PTR_ERR(in_dev));
1471 			if (dev->flags & IFF_LOOPBACK) {
1472 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1473 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1474 			}
1475 		} else if (event == NETDEV_CHANGEMTU) {
1476 			/* Re-enabling IP */
1477 			if (inetdev_valid_mtu(dev->mtu))
1478 				in_dev = inetdev_init(dev);
1479 		}
1480 		goto out;
1481 	}
1482 
1483 	switch (event) {
1484 	case NETDEV_REGISTER:
1485 		pr_debug("%s: bug\n", __func__);
1486 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1487 		break;
1488 	case NETDEV_UP:
1489 		if (!inetdev_valid_mtu(dev->mtu))
1490 			break;
1491 		if (dev->flags & IFF_LOOPBACK) {
1492 			struct in_ifaddr *ifa = inet_alloc_ifa();
1493 
1494 			if (ifa) {
1495 				INIT_HLIST_NODE(&ifa->hash);
1496 				ifa->ifa_local =
1497 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1498 				ifa->ifa_prefixlen = 8;
1499 				ifa->ifa_mask = inet_make_mask(8);
1500 				in_dev_hold(in_dev);
1501 				ifa->ifa_dev = in_dev;
1502 				ifa->ifa_scope = RT_SCOPE_HOST;
1503 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1504 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1505 						 INFINITY_LIFE_TIME);
1506 				ipv4_devconf_setall(in_dev);
1507 				neigh_parms_data_state_setall(in_dev->arp_parms);
1508 				inet_insert_ifa(ifa);
1509 			}
1510 		}
1511 		ip_mc_up(in_dev);
1512 		/* fall through */
1513 	case NETDEV_CHANGEADDR:
1514 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1515 			break;
1516 		/* fall through */
1517 	case NETDEV_NOTIFY_PEERS:
1518 		/* Send gratuitous ARP to notify of link change */
1519 		inetdev_send_gratuitous_arp(dev, in_dev);
1520 		break;
1521 	case NETDEV_DOWN:
1522 		ip_mc_down(in_dev);
1523 		break;
1524 	case NETDEV_PRE_TYPE_CHANGE:
1525 		ip_mc_unmap(in_dev);
1526 		break;
1527 	case NETDEV_POST_TYPE_CHANGE:
1528 		ip_mc_remap(in_dev);
1529 		break;
1530 	case NETDEV_CHANGEMTU:
1531 		if (inetdev_valid_mtu(dev->mtu))
1532 			break;
1533 		/* disable IP when MTU is not enough */
1534 		/* fall through */
1535 	case NETDEV_UNREGISTER:
1536 		inetdev_destroy(in_dev);
1537 		break;
1538 	case NETDEV_CHANGENAME:
1539 		/* Do not notify about label change, this event is
1540 		 * not interesting to applications using netlink.
1541 		 */
1542 		inetdev_changename(dev, in_dev);
1543 
1544 		devinet_sysctl_unregister(in_dev);
1545 		devinet_sysctl_register(in_dev);
1546 		break;
1547 	}
1548 out:
1549 	return NOTIFY_DONE;
1550 }
1551 
1552 static struct notifier_block ip_netdev_notifier = {
1553 	.notifier_call = inetdev_event,
1554 };
1555 
1556 static size_t inet_nlmsg_size(void)
1557 {
1558 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1559 	       + nla_total_size(4) /* IFA_ADDRESS */
1560 	       + nla_total_size(4) /* IFA_LOCAL */
1561 	       + nla_total_size(4) /* IFA_BROADCAST */
1562 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1563 	       + nla_total_size(4)  /* IFA_FLAGS */
1564 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1565 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1566 }
1567 
1568 static inline u32 cstamp_delta(unsigned long cstamp)
1569 {
1570 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1571 }
1572 
1573 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1574 			 unsigned long tstamp, u32 preferred, u32 valid)
1575 {
1576 	struct ifa_cacheinfo ci;
1577 
1578 	ci.cstamp = cstamp_delta(cstamp);
1579 	ci.tstamp = cstamp_delta(tstamp);
1580 	ci.ifa_prefered = preferred;
1581 	ci.ifa_valid = valid;
1582 
1583 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1584 }
1585 
1586 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1587 			    u32 portid, u32 seq, int event, unsigned int flags)
1588 {
1589 	struct ifaddrmsg *ifm;
1590 	struct nlmsghdr  *nlh;
1591 	u32 preferred, valid;
1592 
1593 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1594 	if (!nlh)
1595 		return -EMSGSIZE;
1596 
1597 	ifm = nlmsg_data(nlh);
1598 	ifm->ifa_family = AF_INET;
1599 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1600 	ifm->ifa_flags = ifa->ifa_flags;
1601 	ifm->ifa_scope = ifa->ifa_scope;
1602 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1603 
1604 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1605 		preferred = ifa->ifa_preferred_lft;
1606 		valid = ifa->ifa_valid_lft;
1607 		if (preferred != INFINITY_LIFE_TIME) {
1608 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1609 
1610 			if (preferred > tval)
1611 				preferred -= tval;
1612 			else
1613 				preferred = 0;
1614 			if (valid != INFINITY_LIFE_TIME) {
1615 				if (valid > tval)
1616 					valid -= tval;
1617 				else
1618 					valid = 0;
1619 			}
1620 		}
1621 	} else {
1622 		preferred = INFINITY_LIFE_TIME;
1623 		valid = INFINITY_LIFE_TIME;
1624 	}
1625 	if ((ifa->ifa_address &&
1626 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1627 	    (ifa->ifa_local &&
1628 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1629 	    (ifa->ifa_broadcast &&
1630 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1631 	    (ifa->ifa_label[0] &&
1632 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1633 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1634 	    (ifa->ifa_rt_priority &&
1635 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1636 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1637 			  preferred, valid))
1638 		goto nla_put_failure;
1639 
1640 	nlmsg_end(skb, nlh);
1641 	return 0;
1642 
1643 nla_put_failure:
1644 	nlmsg_cancel(skb, nlh);
1645 	return -EMSGSIZE;
1646 }
1647 
1648 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1649 {
1650 	struct net *net = sock_net(skb->sk);
1651 	int h, s_h;
1652 	int idx, s_idx;
1653 	int ip_idx, s_ip_idx;
1654 	struct net_device *dev;
1655 	struct in_device *in_dev;
1656 	struct in_ifaddr *ifa;
1657 	struct hlist_head *head;
1658 
1659 	s_h = cb->args[0];
1660 	s_idx = idx = cb->args[1];
1661 	s_ip_idx = ip_idx = cb->args[2];
1662 
1663 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1664 		idx = 0;
1665 		head = &net->dev_index_head[h];
1666 		rcu_read_lock();
1667 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1668 			  net->dev_base_seq;
1669 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1670 			if (idx < s_idx)
1671 				goto cont;
1672 			if (h > s_h || idx > s_idx)
1673 				s_ip_idx = 0;
1674 			in_dev = __in_dev_get_rcu(dev);
1675 			if (!in_dev)
1676 				goto cont;
1677 
1678 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1679 			     ifa = ifa->ifa_next, ip_idx++) {
1680 				if (ip_idx < s_ip_idx)
1681 					continue;
1682 				if (inet_fill_ifaddr(skb, ifa,
1683 					     NETLINK_CB(cb->skb).portid,
1684 					     cb->nlh->nlmsg_seq,
1685 					     RTM_NEWADDR, NLM_F_MULTI) < 0) {
1686 					rcu_read_unlock();
1687 					goto done;
1688 				}
1689 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1690 			}
1691 cont:
1692 			idx++;
1693 		}
1694 		rcu_read_unlock();
1695 	}
1696 
1697 done:
1698 	cb->args[0] = h;
1699 	cb->args[1] = idx;
1700 	cb->args[2] = ip_idx;
1701 
1702 	return skb->len;
1703 }
1704 
1705 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1706 		      u32 portid)
1707 {
1708 	struct sk_buff *skb;
1709 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1710 	int err = -ENOBUFS;
1711 	struct net *net;
1712 
1713 	net = dev_net(ifa->ifa_dev->dev);
1714 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1715 	if (!skb)
1716 		goto errout;
1717 
1718 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1719 	if (err < 0) {
1720 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1721 		WARN_ON(err == -EMSGSIZE);
1722 		kfree_skb(skb);
1723 		goto errout;
1724 	}
1725 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1726 	return;
1727 errout:
1728 	if (err < 0)
1729 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1730 }
1731 
1732 static size_t inet_get_link_af_size(const struct net_device *dev,
1733 				    u32 ext_filter_mask)
1734 {
1735 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1736 
1737 	if (!in_dev)
1738 		return 0;
1739 
1740 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1741 }
1742 
1743 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1744 			     u32 ext_filter_mask)
1745 {
1746 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1747 	struct nlattr *nla;
1748 	int i;
1749 
1750 	if (!in_dev)
1751 		return -ENODATA;
1752 
1753 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1754 	if (!nla)
1755 		return -EMSGSIZE;
1756 
1757 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1758 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1759 
1760 	return 0;
1761 }
1762 
1763 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1764 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1765 };
1766 
1767 static int inet_validate_link_af(const struct net_device *dev,
1768 				 const struct nlattr *nla)
1769 {
1770 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1771 	int err, rem;
1772 
1773 	if (dev && !__in_dev_get_rcu(dev))
1774 		return -EAFNOSUPPORT;
1775 
1776 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1777 	if (err < 0)
1778 		return err;
1779 
1780 	if (tb[IFLA_INET_CONF]) {
1781 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1782 			int cfgid = nla_type(a);
1783 
1784 			if (nla_len(a) < 4)
1785 				return -EINVAL;
1786 
1787 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1788 				return -EINVAL;
1789 		}
1790 	}
1791 
1792 	return 0;
1793 }
1794 
1795 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1796 {
1797 	struct in_device *in_dev = __in_dev_get_rcu(dev);
1798 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1799 	int rem;
1800 
1801 	if (!in_dev)
1802 		return -EAFNOSUPPORT;
1803 
1804 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1805 		BUG();
1806 
1807 	if (tb[IFLA_INET_CONF]) {
1808 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1809 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1810 	}
1811 
1812 	return 0;
1813 }
1814 
1815 static int inet_netconf_msgsize_devconf(int type)
1816 {
1817 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1818 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1819 	bool all = false;
1820 
1821 	if (type == NETCONFA_ALL)
1822 		all = true;
1823 
1824 	if (all || type == NETCONFA_FORWARDING)
1825 		size += nla_total_size(4);
1826 	if (all || type == NETCONFA_RP_FILTER)
1827 		size += nla_total_size(4);
1828 	if (all || type == NETCONFA_MC_FORWARDING)
1829 		size += nla_total_size(4);
1830 	if (all || type == NETCONFA_PROXY_NEIGH)
1831 		size += nla_total_size(4);
1832 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1833 		size += nla_total_size(4);
1834 
1835 	return size;
1836 }
1837 
1838 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1839 				     struct ipv4_devconf *devconf, u32 portid,
1840 				     u32 seq, int event, unsigned int flags,
1841 				     int type)
1842 {
1843 	struct nlmsghdr  *nlh;
1844 	struct netconfmsg *ncm;
1845 	bool all = false;
1846 
1847 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1848 			flags);
1849 	if (!nlh)
1850 		return -EMSGSIZE;
1851 
1852 	if (type == NETCONFA_ALL)
1853 		all = true;
1854 
1855 	ncm = nlmsg_data(nlh);
1856 	ncm->ncm_family = AF_INET;
1857 
1858 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1859 		goto nla_put_failure;
1860 
1861 	if (!devconf)
1862 		goto out;
1863 
1864 	if ((all || type == NETCONFA_FORWARDING) &&
1865 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1866 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1867 		goto nla_put_failure;
1868 	if ((all || type == NETCONFA_RP_FILTER) &&
1869 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1870 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1871 		goto nla_put_failure;
1872 	if ((all || type == NETCONFA_MC_FORWARDING) &&
1873 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1874 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1875 		goto nla_put_failure;
1876 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
1877 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1878 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1879 		goto nla_put_failure;
1880 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1881 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1882 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1883 		goto nla_put_failure;
1884 
1885 out:
1886 	nlmsg_end(skb, nlh);
1887 	return 0;
1888 
1889 nla_put_failure:
1890 	nlmsg_cancel(skb, nlh);
1891 	return -EMSGSIZE;
1892 }
1893 
1894 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1895 				 int ifindex, struct ipv4_devconf *devconf)
1896 {
1897 	struct sk_buff *skb;
1898 	int err = -ENOBUFS;
1899 
1900 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1901 	if (!skb)
1902 		goto errout;
1903 
1904 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1905 					event, 0, type);
1906 	if (err < 0) {
1907 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1908 		WARN_ON(err == -EMSGSIZE);
1909 		kfree_skb(skb);
1910 		goto errout;
1911 	}
1912 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1913 	return;
1914 errout:
1915 	if (err < 0)
1916 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1917 }
1918 
1919 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1920 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1921 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1922 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1923 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1924 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
1925 };
1926 
1927 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1928 				    struct nlmsghdr *nlh,
1929 				    struct netlink_ext_ack *extack)
1930 {
1931 	struct net *net = sock_net(in_skb->sk);
1932 	struct nlattr *tb[NETCONFA_MAX+1];
1933 	struct netconfmsg *ncm;
1934 	struct sk_buff *skb;
1935 	struct ipv4_devconf *devconf;
1936 	struct in_device *in_dev;
1937 	struct net_device *dev;
1938 	int ifindex;
1939 	int err;
1940 
1941 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1942 			  devconf_ipv4_policy, extack);
1943 	if (err < 0)
1944 		goto errout;
1945 
1946 	err = -EINVAL;
1947 	if (!tb[NETCONFA_IFINDEX])
1948 		goto errout;
1949 
1950 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1951 	switch (ifindex) {
1952 	case NETCONFA_IFINDEX_ALL:
1953 		devconf = net->ipv4.devconf_all;
1954 		break;
1955 	case NETCONFA_IFINDEX_DEFAULT:
1956 		devconf = net->ipv4.devconf_dflt;
1957 		break;
1958 	default:
1959 		dev = __dev_get_by_index(net, ifindex);
1960 		if (!dev)
1961 			goto errout;
1962 		in_dev = __in_dev_get_rtnl(dev);
1963 		if (!in_dev)
1964 			goto errout;
1965 		devconf = &in_dev->cnf;
1966 		break;
1967 	}
1968 
1969 	err = -ENOBUFS;
1970 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1971 	if (!skb)
1972 		goto errout;
1973 
1974 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1975 					NETLINK_CB(in_skb).portid,
1976 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1977 					NETCONFA_ALL);
1978 	if (err < 0) {
1979 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1980 		WARN_ON(err == -EMSGSIZE);
1981 		kfree_skb(skb);
1982 		goto errout;
1983 	}
1984 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1985 errout:
1986 	return err;
1987 }
1988 
1989 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1990 				     struct netlink_callback *cb)
1991 {
1992 	struct net *net = sock_net(skb->sk);
1993 	int h, s_h;
1994 	int idx, s_idx;
1995 	struct net_device *dev;
1996 	struct in_device *in_dev;
1997 	struct hlist_head *head;
1998 
1999 	s_h = cb->args[0];
2000 	s_idx = idx = cb->args[1];
2001 
2002 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2003 		idx = 0;
2004 		head = &net->dev_index_head[h];
2005 		rcu_read_lock();
2006 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2007 			  net->dev_base_seq;
2008 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2009 			if (idx < s_idx)
2010 				goto cont;
2011 			in_dev = __in_dev_get_rcu(dev);
2012 			if (!in_dev)
2013 				goto cont;
2014 
2015 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2016 						      &in_dev->cnf,
2017 						      NETLINK_CB(cb->skb).portid,
2018 						      cb->nlh->nlmsg_seq,
2019 						      RTM_NEWNETCONF,
2020 						      NLM_F_MULTI,
2021 						      NETCONFA_ALL) < 0) {
2022 				rcu_read_unlock();
2023 				goto done;
2024 			}
2025 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2026 cont:
2027 			idx++;
2028 		}
2029 		rcu_read_unlock();
2030 	}
2031 	if (h == NETDEV_HASHENTRIES) {
2032 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2033 					      net->ipv4.devconf_all,
2034 					      NETLINK_CB(cb->skb).portid,
2035 					      cb->nlh->nlmsg_seq,
2036 					      RTM_NEWNETCONF, NLM_F_MULTI,
2037 					      NETCONFA_ALL) < 0)
2038 			goto done;
2039 		else
2040 			h++;
2041 	}
2042 	if (h == NETDEV_HASHENTRIES + 1) {
2043 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2044 					      net->ipv4.devconf_dflt,
2045 					      NETLINK_CB(cb->skb).portid,
2046 					      cb->nlh->nlmsg_seq,
2047 					      RTM_NEWNETCONF, NLM_F_MULTI,
2048 					      NETCONFA_ALL) < 0)
2049 			goto done;
2050 		else
2051 			h++;
2052 	}
2053 done:
2054 	cb->args[0] = h;
2055 	cb->args[1] = idx;
2056 
2057 	return skb->len;
2058 }
2059 
2060 #ifdef CONFIG_SYSCTL
2061 
2062 static void devinet_copy_dflt_conf(struct net *net, int i)
2063 {
2064 	struct net_device *dev;
2065 
2066 	rcu_read_lock();
2067 	for_each_netdev_rcu(net, dev) {
2068 		struct in_device *in_dev;
2069 
2070 		in_dev = __in_dev_get_rcu(dev);
2071 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2072 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2073 	}
2074 	rcu_read_unlock();
2075 }
2076 
2077 /* called with RTNL locked */
2078 static void inet_forward_change(struct net *net)
2079 {
2080 	struct net_device *dev;
2081 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2082 
2083 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2084 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2085 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2086 				    NETCONFA_FORWARDING,
2087 				    NETCONFA_IFINDEX_ALL,
2088 				    net->ipv4.devconf_all);
2089 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2090 				    NETCONFA_FORWARDING,
2091 				    NETCONFA_IFINDEX_DEFAULT,
2092 				    net->ipv4.devconf_dflt);
2093 
2094 	for_each_netdev(net, dev) {
2095 		struct in_device *in_dev;
2096 
2097 		if (on)
2098 			dev_disable_lro(dev);
2099 
2100 		in_dev = __in_dev_get_rtnl(dev);
2101 		if (in_dev) {
2102 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2103 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2104 						    NETCONFA_FORWARDING,
2105 						    dev->ifindex, &in_dev->cnf);
2106 		}
2107 	}
2108 }
2109 
2110 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2111 {
2112 	if (cnf == net->ipv4.devconf_dflt)
2113 		return NETCONFA_IFINDEX_DEFAULT;
2114 	else if (cnf == net->ipv4.devconf_all)
2115 		return NETCONFA_IFINDEX_ALL;
2116 	else {
2117 		struct in_device *idev
2118 			= container_of(cnf, struct in_device, cnf);
2119 		return idev->dev->ifindex;
2120 	}
2121 }
2122 
2123 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2124 			     void __user *buffer,
2125 			     size_t *lenp, loff_t *ppos)
2126 {
2127 	int old_value = *(int *)ctl->data;
2128 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2129 	int new_value = *(int *)ctl->data;
2130 
2131 	if (write) {
2132 		struct ipv4_devconf *cnf = ctl->extra1;
2133 		struct net *net = ctl->extra2;
2134 		int i = (int *)ctl->data - cnf->data;
2135 		int ifindex;
2136 
2137 		set_bit(i, cnf->state);
2138 
2139 		if (cnf == net->ipv4.devconf_dflt)
2140 			devinet_copy_dflt_conf(net, i);
2141 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2142 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2143 			if ((new_value == 0) && (old_value != 0))
2144 				rt_cache_flush(net);
2145 
2146 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2147 		    new_value != old_value) {
2148 			ifindex = devinet_conf_ifindex(net, cnf);
2149 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2150 						    NETCONFA_RP_FILTER,
2151 						    ifindex, cnf);
2152 		}
2153 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2154 		    new_value != old_value) {
2155 			ifindex = devinet_conf_ifindex(net, cnf);
2156 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2157 						    NETCONFA_PROXY_NEIGH,
2158 						    ifindex, cnf);
2159 		}
2160 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2161 		    new_value != old_value) {
2162 			ifindex = devinet_conf_ifindex(net, cnf);
2163 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2164 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2165 						    ifindex, cnf);
2166 		}
2167 	}
2168 
2169 	return ret;
2170 }
2171 
2172 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2173 				  void __user *buffer,
2174 				  size_t *lenp, loff_t *ppos)
2175 {
2176 	int *valp = ctl->data;
2177 	int val = *valp;
2178 	loff_t pos = *ppos;
2179 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2180 
2181 	if (write && *valp != val) {
2182 		struct net *net = ctl->extra2;
2183 
2184 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2185 			if (!rtnl_trylock()) {
2186 				/* Restore the original values before restarting */
2187 				*valp = val;
2188 				*ppos = pos;
2189 				return restart_syscall();
2190 			}
2191 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2192 				inet_forward_change(net);
2193 			} else {
2194 				struct ipv4_devconf *cnf = ctl->extra1;
2195 				struct in_device *idev =
2196 					container_of(cnf, struct in_device, cnf);
2197 				if (*valp)
2198 					dev_disable_lro(idev->dev);
2199 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2200 							    NETCONFA_FORWARDING,
2201 							    idev->dev->ifindex,
2202 							    cnf);
2203 			}
2204 			rtnl_unlock();
2205 			rt_cache_flush(net);
2206 		} else
2207 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2208 						    NETCONFA_FORWARDING,
2209 						    NETCONFA_IFINDEX_DEFAULT,
2210 						    net->ipv4.devconf_dflt);
2211 	}
2212 
2213 	return ret;
2214 }
2215 
2216 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2217 				void __user *buffer,
2218 				size_t *lenp, loff_t *ppos)
2219 {
2220 	int *valp = ctl->data;
2221 	int val = *valp;
2222 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2223 	struct net *net = ctl->extra2;
2224 
2225 	if (write && *valp != val)
2226 		rt_cache_flush(net);
2227 
2228 	return ret;
2229 }
2230 
2231 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2232 	{ \
2233 		.procname	= name, \
2234 		.data		= ipv4_devconf.data + \
2235 				  IPV4_DEVCONF_ ## attr - 1, \
2236 		.maxlen		= sizeof(int), \
2237 		.mode		= mval, \
2238 		.proc_handler	= proc, \
2239 		.extra1		= &ipv4_devconf, \
2240 	}
2241 
2242 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2243 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2244 
2245 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2246 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2247 
2248 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2249 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2250 
2251 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2252 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2253 
2254 static struct devinet_sysctl_table {
2255 	struct ctl_table_header *sysctl_header;
2256 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2257 } devinet_sysctl = {
2258 	.devinet_vars = {
2259 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2260 					     devinet_sysctl_forward),
2261 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2262 
2263 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2264 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2265 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2266 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2267 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2268 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2269 					"accept_source_route"),
2270 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2271 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2272 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2273 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2274 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2275 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2276 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2277 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2278 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2279 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2280 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2281 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2282 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2283 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2284 					"force_igmp_version"),
2285 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2286 					"igmpv2_unsolicited_report_interval"),
2287 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2288 					"igmpv3_unsolicited_report_interval"),
2289 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2290 					"ignore_routes_with_linkdown"),
2291 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2292 					"drop_gratuitous_arp"),
2293 
2294 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2295 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2296 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2297 					      "promote_secondaries"),
2298 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2299 					      "route_localnet"),
2300 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2301 					      "drop_unicast_in_l2_multicast"),
2302 	},
2303 };
2304 
2305 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2306 				     int ifindex, struct ipv4_devconf *p)
2307 {
2308 	int i;
2309 	struct devinet_sysctl_table *t;
2310 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2311 
2312 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2313 	if (!t)
2314 		goto out;
2315 
2316 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2317 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2318 		t->devinet_vars[i].extra1 = p;
2319 		t->devinet_vars[i].extra2 = net;
2320 	}
2321 
2322 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2323 
2324 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2325 	if (!t->sysctl_header)
2326 		goto free;
2327 
2328 	p->sysctl = t;
2329 
2330 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2331 				    ifindex, p);
2332 	return 0;
2333 
2334 free:
2335 	kfree(t);
2336 out:
2337 	return -ENOBUFS;
2338 }
2339 
2340 static void __devinet_sysctl_unregister(struct net *net,
2341 					struct ipv4_devconf *cnf, int ifindex)
2342 {
2343 	struct devinet_sysctl_table *t = cnf->sysctl;
2344 
2345 	if (t) {
2346 		cnf->sysctl = NULL;
2347 		unregister_net_sysctl_table(t->sysctl_header);
2348 		kfree(t);
2349 	}
2350 
2351 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2352 }
2353 
2354 static int devinet_sysctl_register(struct in_device *idev)
2355 {
2356 	int err;
2357 
2358 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2359 		return -EINVAL;
2360 
2361 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2362 	if (err)
2363 		return err;
2364 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2365 					idev->dev->ifindex, &idev->cnf);
2366 	if (err)
2367 		neigh_sysctl_unregister(idev->arp_parms);
2368 	return err;
2369 }
2370 
2371 static void devinet_sysctl_unregister(struct in_device *idev)
2372 {
2373 	struct net *net = dev_net(idev->dev);
2374 
2375 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2376 	neigh_sysctl_unregister(idev->arp_parms);
2377 }
2378 
2379 static struct ctl_table ctl_forward_entry[] = {
2380 	{
2381 		.procname	= "ip_forward",
2382 		.data		= &ipv4_devconf.data[
2383 					IPV4_DEVCONF_FORWARDING - 1],
2384 		.maxlen		= sizeof(int),
2385 		.mode		= 0644,
2386 		.proc_handler	= devinet_sysctl_forward,
2387 		.extra1		= &ipv4_devconf,
2388 		.extra2		= &init_net,
2389 	},
2390 	{ },
2391 };
2392 #endif
2393 
2394 static __net_init int devinet_init_net(struct net *net)
2395 {
2396 	int err;
2397 	struct ipv4_devconf *all, *dflt;
2398 #ifdef CONFIG_SYSCTL
2399 	struct ctl_table *tbl = ctl_forward_entry;
2400 	struct ctl_table_header *forw_hdr;
2401 #endif
2402 
2403 	err = -ENOMEM;
2404 	all = &ipv4_devconf;
2405 	dflt = &ipv4_devconf_dflt;
2406 
2407 	if (!net_eq(net, &init_net)) {
2408 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2409 		if (!all)
2410 			goto err_alloc_all;
2411 
2412 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2413 		if (!dflt)
2414 			goto err_alloc_dflt;
2415 
2416 #ifdef CONFIG_SYSCTL
2417 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2418 		if (!tbl)
2419 			goto err_alloc_ctl;
2420 
2421 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2422 		tbl[0].extra1 = all;
2423 		tbl[0].extra2 = net;
2424 #endif
2425 	}
2426 
2427 #ifdef CONFIG_SYSCTL
2428 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2429 	if (err < 0)
2430 		goto err_reg_all;
2431 
2432 	err = __devinet_sysctl_register(net, "default",
2433 					NETCONFA_IFINDEX_DEFAULT, dflt);
2434 	if (err < 0)
2435 		goto err_reg_dflt;
2436 
2437 	err = -ENOMEM;
2438 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2439 	if (!forw_hdr)
2440 		goto err_reg_ctl;
2441 	net->ipv4.forw_hdr = forw_hdr;
2442 #endif
2443 
2444 	net->ipv4.devconf_all = all;
2445 	net->ipv4.devconf_dflt = dflt;
2446 	return 0;
2447 
2448 #ifdef CONFIG_SYSCTL
2449 err_reg_ctl:
2450 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2451 err_reg_dflt:
2452 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2453 err_reg_all:
2454 	if (tbl != ctl_forward_entry)
2455 		kfree(tbl);
2456 err_alloc_ctl:
2457 #endif
2458 	if (dflt != &ipv4_devconf_dflt)
2459 		kfree(dflt);
2460 err_alloc_dflt:
2461 	if (all != &ipv4_devconf)
2462 		kfree(all);
2463 err_alloc_all:
2464 	return err;
2465 }
2466 
2467 static __net_exit void devinet_exit_net(struct net *net)
2468 {
2469 #ifdef CONFIG_SYSCTL
2470 	struct ctl_table *tbl;
2471 
2472 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2473 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2474 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2475 				    NETCONFA_IFINDEX_DEFAULT);
2476 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2477 				    NETCONFA_IFINDEX_ALL);
2478 	kfree(tbl);
2479 #endif
2480 	kfree(net->ipv4.devconf_dflt);
2481 	kfree(net->ipv4.devconf_all);
2482 }
2483 
2484 static __net_initdata struct pernet_operations devinet_ops = {
2485 	.init = devinet_init_net,
2486 	.exit = devinet_exit_net,
2487 };
2488 
2489 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2490 	.family		  = AF_INET,
2491 	.fill_link_af	  = inet_fill_link_af,
2492 	.get_link_af_size = inet_get_link_af_size,
2493 	.validate_link_af = inet_validate_link_af,
2494 	.set_link_af	  = inet_set_link_af,
2495 };
2496 
2497 void __init devinet_init(void)
2498 {
2499 	int i;
2500 
2501 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2502 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2503 
2504 	register_pernet_subsys(&devinet_ops);
2505 
2506 	register_gifconf(PF_INET, inet_gifconf);
2507 	register_netdevice_notifier(&ip_netdev_notifier);
2508 
2509 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2510 
2511 	rtnl_af_register(&inet_af_ops);
2512 
2513 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2514 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2515 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2516 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2517 		      inet_netconf_dump_devconf, 0);
2518 }
2519