xref: /openbmc/linux/net/ipv4/devinet.c (revision 51f6b410)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60 
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77 	},
78 };
79 
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81 	.data = {
82 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89 	},
90 };
91 
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94 
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96 	[IFA_LOCAL]     	= { .type = NLA_U32 },
97 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
98 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
99 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
101 	[IFA_FLAGS]		= { .type = NLA_U32 },
102 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
103 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
104 };
105 
106 struct inet_fill_args {
107 	u32 portid;
108 	u32 seq;
109 	int event;
110 	unsigned int flags;
111 	int netnsid;
112 };
113 
114 #define IN4_ADDR_HSIZE_SHIFT	8
115 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
116 
117 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
118 
119 static u32 inet_addr_hash(const struct net *net, __be32 addr)
120 {
121 	u32 val = (__force u32) addr ^ net_hash_mix(net);
122 
123 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
124 }
125 
126 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
127 {
128 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
129 
130 	ASSERT_RTNL();
131 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
132 }
133 
134 static void inet_hash_remove(struct in_ifaddr *ifa)
135 {
136 	ASSERT_RTNL();
137 	hlist_del_init_rcu(&ifa->hash);
138 }
139 
140 /**
141  * __ip_dev_find - find the first device with a given source address.
142  * @net: the net namespace
143  * @addr: the source address
144  * @devref: if true, take a reference on the found device
145  *
146  * If a caller uses devref=false, it should be protected by RCU, or RTNL
147  */
148 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
149 {
150 	struct net_device *result = NULL;
151 	struct in_ifaddr *ifa;
152 
153 	rcu_read_lock();
154 	ifa = inet_lookup_ifaddr_rcu(net, addr);
155 	if (!ifa) {
156 		struct flowi4 fl4 = { .daddr = addr };
157 		struct fib_result res = { 0 };
158 		struct fib_table *local;
159 
160 		/* Fallback to FIB local table so that communication
161 		 * over loopback subnets work.
162 		 */
163 		local = fib_get_table(net, RT_TABLE_LOCAL);
164 		if (local &&
165 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166 		    res.type == RTN_LOCAL)
167 			result = FIB_RES_DEV(res);
168 	} else {
169 		result = ifa->ifa_dev->dev;
170 	}
171 	if (result && devref)
172 		dev_hold(result);
173 	rcu_read_unlock();
174 	return result;
175 }
176 EXPORT_SYMBOL(__ip_dev_find);
177 
178 /* called under RCU lock */
179 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
180 {
181 	u32 hash = inet_addr_hash(net, addr);
182 	struct in_ifaddr *ifa;
183 
184 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
185 		if (ifa->ifa_local == addr &&
186 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
187 			return ifa;
188 
189 	return NULL;
190 }
191 
192 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
193 
194 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
195 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
196 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
197 			 int destroy);
198 #ifdef CONFIG_SYSCTL
199 static int devinet_sysctl_register(struct in_device *idev);
200 static void devinet_sysctl_unregister(struct in_device *idev);
201 #else
202 static int devinet_sysctl_register(struct in_device *idev)
203 {
204 	return 0;
205 }
206 static void devinet_sysctl_unregister(struct in_device *idev)
207 {
208 }
209 #endif
210 
211 /* Locks all the inet devices. */
212 
213 static struct in_ifaddr *inet_alloc_ifa(void)
214 {
215 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
216 }
217 
218 static void inet_rcu_free_ifa(struct rcu_head *head)
219 {
220 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
221 	if (ifa->ifa_dev)
222 		in_dev_put(ifa->ifa_dev);
223 	kfree(ifa);
224 }
225 
226 static void inet_free_ifa(struct in_ifaddr *ifa)
227 {
228 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
229 }
230 
231 void in_dev_finish_destroy(struct in_device *idev)
232 {
233 	struct net_device *dev = idev->dev;
234 
235 	WARN_ON(idev->ifa_list);
236 	WARN_ON(idev->mc_list);
237 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
238 #ifdef NET_REFCNT_DEBUG
239 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
240 #endif
241 	dev_put(dev);
242 	if (!idev->dead)
243 		pr_err("Freeing alive in_device %p\n", idev);
244 	else
245 		kfree(idev);
246 }
247 EXPORT_SYMBOL(in_dev_finish_destroy);
248 
249 static struct in_device *inetdev_init(struct net_device *dev)
250 {
251 	struct in_device *in_dev;
252 	int err = -ENOMEM;
253 
254 	ASSERT_RTNL();
255 
256 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
257 	if (!in_dev)
258 		goto out;
259 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
260 			sizeof(in_dev->cnf));
261 	in_dev->cnf.sysctl = NULL;
262 	in_dev->dev = dev;
263 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
264 	if (!in_dev->arp_parms)
265 		goto out_kfree;
266 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
267 		dev_disable_lro(dev);
268 	/* Reference in_dev->dev */
269 	dev_hold(dev);
270 	/* Account for reference dev->ip_ptr (below) */
271 	refcount_set(&in_dev->refcnt, 1);
272 
273 	err = devinet_sysctl_register(in_dev);
274 	if (err) {
275 		in_dev->dead = 1;
276 		in_dev_put(in_dev);
277 		in_dev = NULL;
278 		goto out;
279 	}
280 	ip_mc_init_dev(in_dev);
281 	if (dev->flags & IFF_UP)
282 		ip_mc_up(in_dev);
283 
284 	/* we can receive as soon as ip_ptr is set -- do this last */
285 	rcu_assign_pointer(dev->ip_ptr, in_dev);
286 out:
287 	return in_dev ?: ERR_PTR(err);
288 out_kfree:
289 	kfree(in_dev);
290 	in_dev = NULL;
291 	goto out;
292 }
293 
294 static void in_dev_rcu_put(struct rcu_head *head)
295 {
296 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
297 	in_dev_put(idev);
298 }
299 
300 static void inetdev_destroy(struct in_device *in_dev)
301 {
302 	struct in_ifaddr *ifa;
303 	struct net_device *dev;
304 
305 	ASSERT_RTNL();
306 
307 	dev = in_dev->dev;
308 
309 	in_dev->dead = 1;
310 
311 	ip_mc_destroy_dev(in_dev);
312 
313 	while ((ifa = in_dev->ifa_list) != NULL) {
314 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
315 		inet_free_ifa(ifa);
316 	}
317 
318 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
319 
320 	devinet_sysctl_unregister(in_dev);
321 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
322 	arp_ifdown(dev);
323 
324 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
325 }
326 
327 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
328 {
329 	rcu_read_lock();
330 	for_primary_ifa(in_dev) {
331 		if (inet_ifa_match(a, ifa)) {
332 			if (!b || inet_ifa_match(b, ifa)) {
333 				rcu_read_unlock();
334 				return 1;
335 			}
336 		}
337 	} endfor_ifa(in_dev);
338 	rcu_read_unlock();
339 	return 0;
340 }
341 
342 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
343 			 int destroy, struct nlmsghdr *nlh, u32 portid)
344 {
345 	struct in_ifaddr *promote = NULL;
346 	struct in_ifaddr *ifa, *ifa1 = *ifap;
347 	struct in_ifaddr *last_prim = in_dev->ifa_list;
348 	struct in_ifaddr *prev_prom = NULL;
349 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
350 
351 	ASSERT_RTNL();
352 
353 	if (in_dev->dead)
354 		goto no_promotions;
355 
356 	/* 1. Deleting primary ifaddr forces deletion all secondaries
357 	 * unless alias promotion is set
358 	 **/
359 
360 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
361 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
362 
363 		while ((ifa = *ifap1) != NULL) {
364 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
365 			    ifa1->ifa_scope <= ifa->ifa_scope)
366 				last_prim = ifa;
367 
368 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
369 			    ifa1->ifa_mask != ifa->ifa_mask ||
370 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
371 				ifap1 = &ifa->ifa_next;
372 				prev_prom = ifa;
373 				continue;
374 			}
375 
376 			if (!do_promote) {
377 				inet_hash_remove(ifa);
378 				*ifap1 = ifa->ifa_next;
379 
380 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
381 				blocking_notifier_call_chain(&inetaddr_chain,
382 						NETDEV_DOWN, ifa);
383 				inet_free_ifa(ifa);
384 			} else {
385 				promote = ifa;
386 				break;
387 			}
388 		}
389 	}
390 
391 	/* On promotion all secondaries from subnet are changing
392 	 * the primary IP, we must remove all their routes silently
393 	 * and later to add them back with new prefsrc. Do this
394 	 * while all addresses are on the device list.
395 	 */
396 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
397 		if (ifa1->ifa_mask == ifa->ifa_mask &&
398 		    inet_ifa_match(ifa1->ifa_address, ifa))
399 			fib_del_ifaddr(ifa, ifa1);
400 	}
401 
402 no_promotions:
403 	/* 2. Unlink it */
404 
405 	*ifap = ifa1->ifa_next;
406 	inet_hash_remove(ifa1);
407 
408 	/* 3. Announce address deletion */
409 
410 	/* Send message first, then call notifier.
411 	   At first sight, FIB update triggered by notifier
412 	   will refer to already deleted ifaddr, that could confuse
413 	   netlink listeners. It is not true: look, gated sees
414 	   that route deleted and if it still thinks that ifaddr
415 	   is valid, it will try to restore deleted routes... Grr.
416 	   So that, this order is correct.
417 	 */
418 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
419 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
420 
421 	if (promote) {
422 		struct in_ifaddr *next_sec = promote->ifa_next;
423 
424 		if (prev_prom) {
425 			prev_prom->ifa_next = promote->ifa_next;
426 			promote->ifa_next = last_prim->ifa_next;
427 			last_prim->ifa_next = promote;
428 		}
429 
430 		promote->ifa_flags &= ~IFA_F_SECONDARY;
431 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
432 		blocking_notifier_call_chain(&inetaddr_chain,
433 				NETDEV_UP, promote);
434 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
435 			if (ifa1->ifa_mask != ifa->ifa_mask ||
436 			    !inet_ifa_match(ifa1->ifa_address, ifa))
437 					continue;
438 			fib_add_ifaddr(ifa);
439 		}
440 
441 	}
442 	if (destroy)
443 		inet_free_ifa(ifa1);
444 }
445 
446 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
447 			 int destroy)
448 {
449 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
450 }
451 
452 static void check_lifetime(struct work_struct *work);
453 
454 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
455 
456 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
457 			     u32 portid, struct netlink_ext_ack *extack)
458 {
459 	struct in_device *in_dev = ifa->ifa_dev;
460 	struct in_ifaddr *ifa1, **ifap, **last_primary;
461 	struct in_validator_info ivi;
462 	int ret;
463 
464 	ASSERT_RTNL();
465 
466 	if (!ifa->ifa_local) {
467 		inet_free_ifa(ifa);
468 		return 0;
469 	}
470 
471 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
472 	last_primary = &in_dev->ifa_list;
473 
474 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
475 	     ifap = &ifa1->ifa_next) {
476 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
477 		    ifa->ifa_scope <= ifa1->ifa_scope)
478 			last_primary = &ifa1->ifa_next;
479 		if (ifa1->ifa_mask == ifa->ifa_mask &&
480 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
481 			if (ifa1->ifa_local == ifa->ifa_local) {
482 				inet_free_ifa(ifa);
483 				return -EEXIST;
484 			}
485 			if (ifa1->ifa_scope != ifa->ifa_scope) {
486 				inet_free_ifa(ifa);
487 				return -EINVAL;
488 			}
489 			ifa->ifa_flags |= IFA_F_SECONDARY;
490 		}
491 	}
492 
493 	/* Allow any devices that wish to register ifaddr validtors to weigh
494 	 * in now, before changes are committed.  The rntl lock is serializing
495 	 * access here, so the state should not change between a validator call
496 	 * and a final notify on commit.  This isn't invoked on promotion under
497 	 * the assumption that validators are checking the address itself, and
498 	 * not the flags.
499 	 */
500 	ivi.ivi_addr = ifa->ifa_address;
501 	ivi.ivi_dev = ifa->ifa_dev;
502 	ivi.extack = extack;
503 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
504 					   NETDEV_UP, &ivi);
505 	ret = notifier_to_errno(ret);
506 	if (ret) {
507 		inet_free_ifa(ifa);
508 		return ret;
509 	}
510 
511 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
512 		prandom_seed((__force u32) ifa->ifa_local);
513 		ifap = last_primary;
514 	}
515 
516 	ifa->ifa_next = *ifap;
517 	*ifap = ifa;
518 
519 	inet_hash_insert(dev_net(in_dev->dev), ifa);
520 
521 	cancel_delayed_work(&check_lifetime_work);
522 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
523 
524 	/* Send message first, then call notifier.
525 	   Notifier will trigger FIB update, so that
526 	   listeners of netlink will know about new ifaddr */
527 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
528 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
529 
530 	return 0;
531 }
532 
533 static int inet_insert_ifa(struct in_ifaddr *ifa)
534 {
535 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
536 }
537 
538 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
539 {
540 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
541 
542 	ASSERT_RTNL();
543 
544 	if (!in_dev) {
545 		inet_free_ifa(ifa);
546 		return -ENOBUFS;
547 	}
548 	ipv4_devconf_setall(in_dev);
549 	neigh_parms_data_state_setall(in_dev->arp_parms);
550 	if (ifa->ifa_dev != in_dev) {
551 		WARN_ON(ifa->ifa_dev);
552 		in_dev_hold(in_dev);
553 		ifa->ifa_dev = in_dev;
554 	}
555 	if (ipv4_is_loopback(ifa->ifa_local))
556 		ifa->ifa_scope = RT_SCOPE_HOST;
557 	return inet_insert_ifa(ifa);
558 }
559 
560 /* Caller must hold RCU or RTNL :
561  * We dont take a reference on found in_device
562  */
563 struct in_device *inetdev_by_index(struct net *net, int ifindex)
564 {
565 	struct net_device *dev;
566 	struct in_device *in_dev = NULL;
567 
568 	rcu_read_lock();
569 	dev = dev_get_by_index_rcu(net, ifindex);
570 	if (dev)
571 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
572 	rcu_read_unlock();
573 	return in_dev;
574 }
575 EXPORT_SYMBOL(inetdev_by_index);
576 
577 /* Called only from RTNL semaphored context. No locks. */
578 
579 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
580 				    __be32 mask)
581 {
582 	ASSERT_RTNL();
583 
584 	for_primary_ifa(in_dev) {
585 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
586 			return ifa;
587 	} endfor_ifa(in_dev);
588 	return NULL;
589 }
590 
591 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
592 {
593 	struct ip_mreqn mreq = {
594 		.imr_multiaddr.s_addr = ifa->ifa_address,
595 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
596 	};
597 	int ret;
598 
599 	ASSERT_RTNL();
600 
601 	lock_sock(sk);
602 	if (join)
603 		ret = ip_mc_join_group(sk, &mreq);
604 	else
605 		ret = ip_mc_leave_group(sk, &mreq);
606 	release_sock(sk);
607 
608 	return ret;
609 }
610 
611 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
612 			    struct netlink_ext_ack *extack)
613 {
614 	struct net *net = sock_net(skb->sk);
615 	struct nlattr *tb[IFA_MAX+1];
616 	struct in_device *in_dev;
617 	struct ifaddrmsg *ifm;
618 	struct in_ifaddr *ifa, **ifap;
619 	int err = -EINVAL;
620 
621 	ASSERT_RTNL();
622 
623 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
624 			  extack);
625 	if (err < 0)
626 		goto errout;
627 
628 	ifm = nlmsg_data(nlh);
629 	in_dev = inetdev_by_index(net, ifm->ifa_index);
630 	if (!in_dev) {
631 		err = -ENODEV;
632 		goto errout;
633 	}
634 
635 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
636 	     ifap = &ifa->ifa_next) {
637 		if (tb[IFA_LOCAL] &&
638 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
639 			continue;
640 
641 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
642 			continue;
643 
644 		if (tb[IFA_ADDRESS] &&
645 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
646 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
647 			continue;
648 
649 		if (ipv4_is_multicast(ifa->ifa_address))
650 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
651 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
652 		return 0;
653 	}
654 
655 	err = -EADDRNOTAVAIL;
656 errout:
657 	return err;
658 }
659 
660 #define INFINITY_LIFE_TIME	0xFFFFFFFF
661 
662 static void check_lifetime(struct work_struct *work)
663 {
664 	unsigned long now, next, next_sec, next_sched;
665 	struct in_ifaddr *ifa;
666 	struct hlist_node *n;
667 	int i;
668 
669 	now = jiffies;
670 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
671 
672 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
673 		bool change_needed = false;
674 
675 		rcu_read_lock();
676 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
677 			unsigned long age;
678 
679 			if (ifa->ifa_flags & IFA_F_PERMANENT)
680 				continue;
681 
682 			/* We try to batch several events at once. */
683 			age = (now - ifa->ifa_tstamp +
684 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
685 
686 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
687 			    age >= ifa->ifa_valid_lft) {
688 				change_needed = true;
689 			} else if (ifa->ifa_preferred_lft ==
690 				   INFINITY_LIFE_TIME) {
691 				continue;
692 			} else if (age >= ifa->ifa_preferred_lft) {
693 				if (time_before(ifa->ifa_tstamp +
694 						ifa->ifa_valid_lft * HZ, next))
695 					next = ifa->ifa_tstamp +
696 					       ifa->ifa_valid_lft * HZ;
697 
698 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
699 					change_needed = true;
700 			} else if (time_before(ifa->ifa_tstamp +
701 					       ifa->ifa_preferred_lft * HZ,
702 					       next)) {
703 				next = ifa->ifa_tstamp +
704 				       ifa->ifa_preferred_lft * HZ;
705 			}
706 		}
707 		rcu_read_unlock();
708 		if (!change_needed)
709 			continue;
710 		rtnl_lock();
711 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
712 			unsigned long age;
713 
714 			if (ifa->ifa_flags & IFA_F_PERMANENT)
715 				continue;
716 
717 			/* We try to batch several events at once. */
718 			age = (now - ifa->ifa_tstamp +
719 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
720 
721 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
722 			    age >= ifa->ifa_valid_lft) {
723 				struct in_ifaddr **ifap;
724 
725 				for (ifap = &ifa->ifa_dev->ifa_list;
726 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
727 					if (*ifap == ifa) {
728 						inet_del_ifa(ifa->ifa_dev,
729 							     ifap, 1);
730 						break;
731 					}
732 				}
733 			} else if (ifa->ifa_preferred_lft !=
734 				   INFINITY_LIFE_TIME &&
735 				   age >= ifa->ifa_preferred_lft &&
736 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
737 				ifa->ifa_flags |= IFA_F_DEPRECATED;
738 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
739 			}
740 		}
741 		rtnl_unlock();
742 	}
743 
744 	next_sec = round_jiffies_up(next);
745 	next_sched = next;
746 
747 	/* If rounded timeout is accurate enough, accept it. */
748 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
749 		next_sched = next_sec;
750 
751 	now = jiffies;
752 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
753 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
754 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
755 
756 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
757 			next_sched - now);
758 }
759 
760 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
761 			     __u32 prefered_lft)
762 {
763 	unsigned long timeout;
764 
765 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
766 
767 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
768 	if (addrconf_finite_timeout(timeout))
769 		ifa->ifa_valid_lft = timeout;
770 	else
771 		ifa->ifa_flags |= IFA_F_PERMANENT;
772 
773 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
774 	if (addrconf_finite_timeout(timeout)) {
775 		if (timeout == 0)
776 			ifa->ifa_flags |= IFA_F_DEPRECATED;
777 		ifa->ifa_preferred_lft = timeout;
778 	}
779 	ifa->ifa_tstamp = jiffies;
780 	if (!ifa->ifa_cstamp)
781 		ifa->ifa_cstamp = ifa->ifa_tstamp;
782 }
783 
784 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
785 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
786 {
787 	struct nlattr *tb[IFA_MAX+1];
788 	struct in_ifaddr *ifa;
789 	struct ifaddrmsg *ifm;
790 	struct net_device *dev;
791 	struct in_device *in_dev;
792 	int err;
793 
794 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
795 			  NULL);
796 	if (err < 0)
797 		goto errout;
798 
799 	ifm = nlmsg_data(nlh);
800 	err = -EINVAL;
801 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
802 		goto errout;
803 
804 	dev = __dev_get_by_index(net, ifm->ifa_index);
805 	err = -ENODEV;
806 	if (!dev)
807 		goto errout;
808 
809 	in_dev = __in_dev_get_rtnl(dev);
810 	err = -ENOBUFS;
811 	if (!in_dev)
812 		goto errout;
813 
814 	ifa = inet_alloc_ifa();
815 	if (!ifa)
816 		/*
817 		 * A potential indev allocation can be left alive, it stays
818 		 * assigned to its device and is destroy with it.
819 		 */
820 		goto errout;
821 
822 	ipv4_devconf_setall(in_dev);
823 	neigh_parms_data_state_setall(in_dev->arp_parms);
824 	in_dev_hold(in_dev);
825 
826 	if (!tb[IFA_ADDRESS])
827 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
828 
829 	INIT_HLIST_NODE(&ifa->hash);
830 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
831 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
832 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
833 					 ifm->ifa_flags;
834 	ifa->ifa_scope = ifm->ifa_scope;
835 	ifa->ifa_dev = in_dev;
836 
837 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
838 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
839 
840 	if (tb[IFA_BROADCAST])
841 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
842 
843 	if (tb[IFA_LABEL])
844 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
845 	else
846 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
847 
848 	if (tb[IFA_RT_PRIORITY])
849 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
850 
851 	if (tb[IFA_CACHEINFO]) {
852 		struct ifa_cacheinfo *ci;
853 
854 		ci = nla_data(tb[IFA_CACHEINFO]);
855 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
856 			err = -EINVAL;
857 			goto errout_free;
858 		}
859 		*pvalid_lft = ci->ifa_valid;
860 		*pprefered_lft = ci->ifa_prefered;
861 	}
862 
863 	return ifa;
864 
865 errout_free:
866 	inet_free_ifa(ifa);
867 errout:
868 	return ERR_PTR(err);
869 }
870 
871 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
872 {
873 	struct in_device *in_dev = ifa->ifa_dev;
874 	struct in_ifaddr *ifa1, **ifap;
875 
876 	if (!ifa->ifa_local)
877 		return NULL;
878 
879 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
880 	     ifap = &ifa1->ifa_next) {
881 		if (ifa1->ifa_mask == ifa->ifa_mask &&
882 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
883 		    ifa1->ifa_local == ifa->ifa_local)
884 			return ifa1;
885 	}
886 	return NULL;
887 }
888 
889 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
890 			    struct netlink_ext_ack *extack)
891 {
892 	struct net *net = sock_net(skb->sk);
893 	struct in_ifaddr *ifa;
894 	struct in_ifaddr *ifa_existing;
895 	__u32 valid_lft = INFINITY_LIFE_TIME;
896 	__u32 prefered_lft = INFINITY_LIFE_TIME;
897 
898 	ASSERT_RTNL();
899 
900 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
901 	if (IS_ERR(ifa))
902 		return PTR_ERR(ifa);
903 
904 	ifa_existing = find_matching_ifa(ifa);
905 	if (!ifa_existing) {
906 		/* It would be best to check for !NLM_F_CREATE here but
907 		 * userspace already relies on not having to provide this.
908 		 */
909 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
910 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
911 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
912 					       true, ifa);
913 
914 			if (ret < 0) {
915 				inet_free_ifa(ifa);
916 				return ret;
917 			}
918 		}
919 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
920 					 extack);
921 	} else {
922 		u32 new_metric = ifa->ifa_rt_priority;
923 
924 		inet_free_ifa(ifa);
925 
926 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
927 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
928 			return -EEXIST;
929 		ifa = ifa_existing;
930 
931 		if (ifa->ifa_rt_priority != new_metric) {
932 			fib_modify_prefix_metric(ifa, new_metric);
933 			ifa->ifa_rt_priority = new_metric;
934 		}
935 
936 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
937 		cancel_delayed_work(&check_lifetime_work);
938 		queue_delayed_work(system_power_efficient_wq,
939 				&check_lifetime_work, 0);
940 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
941 	}
942 	return 0;
943 }
944 
945 /*
946  *	Determine a default network mask, based on the IP address.
947  */
948 
949 static int inet_abc_len(__be32 addr)
950 {
951 	int rc = -1;	/* Something else, probably a multicast. */
952 
953 	if (ipv4_is_zeronet(addr))
954 		rc = 0;
955 	else {
956 		__u32 haddr = ntohl(addr);
957 
958 		if (IN_CLASSA(haddr))
959 			rc = 8;
960 		else if (IN_CLASSB(haddr))
961 			rc = 16;
962 		else if (IN_CLASSC(haddr))
963 			rc = 24;
964 	}
965 
966 	return rc;
967 }
968 
969 
970 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
971 {
972 	struct sockaddr_in sin_orig;
973 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
974 	struct in_device *in_dev;
975 	struct in_ifaddr **ifap = NULL;
976 	struct in_ifaddr *ifa = NULL;
977 	struct net_device *dev;
978 	char *colon;
979 	int ret = -EFAULT;
980 	int tryaddrmatch = 0;
981 
982 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
983 
984 	/* save original address for comparison */
985 	memcpy(&sin_orig, sin, sizeof(*sin));
986 
987 	colon = strchr(ifr->ifr_name, ':');
988 	if (colon)
989 		*colon = 0;
990 
991 	dev_load(net, ifr->ifr_name);
992 
993 	switch (cmd) {
994 	case SIOCGIFADDR:	/* Get interface address */
995 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
996 	case SIOCGIFDSTADDR:	/* Get the destination address */
997 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
998 		/* Note that these ioctls will not sleep,
999 		   so that we do not impose a lock.
1000 		   One day we will be forced to put shlock here (I mean SMP)
1001 		 */
1002 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1003 		memset(sin, 0, sizeof(*sin));
1004 		sin->sin_family = AF_INET;
1005 		break;
1006 
1007 	case SIOCSIFFLAGS:
1008 		ret = -EPERM;
1009 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1010 			goto out;
1011 		break;
1012 	case SIOCSIFADDR:	/* Set interface address (and family) */
1013 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1014 	case SIOCSIFDSTADDR:	/* Set the destination address */
1015 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1016 		ret = -EPERM;
1017 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1018 			goto out;
1019 		ret = -EINVAL;
1020 		if (sin->sin_family != AF_INET)
1021 			goto out;
1022 		break;
1023 	default:
1024 		ret = -EINVAL;
1025 		goto out;
1026 	}
1027 
1028 	rtnl_lock();
1029 
1030 	ret = -ENODEV;
1031 	dev = __dev_get_by_name(net, ifr->ifr_name);
1032 	if (!dev)
1033 		goto done;
1034 
1035 	if (colon)
1036 		*colon = ':';
1037 
1038 	in_dev = __in_dev_get_rtnl(dev);
1039 	if (in_dev) {
1040 		if (tryaddrmatch) {
1041 			/* Matthias Andree */
1042 			/* compare label and address (4.4BSD style) */
1043 			/* note: we only do this for a limited set of ioctls
1044 			   and only if the original address family was AF_INET.
1045 			   This is checked above. */
1046 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1047 			     ifap = &ifa->ifa_next) {
1048 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1049 				    sin_orig.sin_addr.s_addr ==
1050 							ifa->ifa_local) {
1051 					break; /* found */
1052 				}
1053 			}
1054 		}
1055 		/* we didn't get a match, maybe the application is
1056 		   4.3BSD-style and passed in junk so we fall back to
1057 		   comparing just the label */
1058 		if (!ifa) {
1059 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1060 			     ifap = &ifa->ifa_next)
1061 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1062 					break;
1063 		}
1064 	}
1065 
1066 	ret = -EADDRNOTAVAIL;
1067 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1068 		goto done;
1069 
1070 	switch (cmd) {
1071 	case SIOCGIFADDR:	/* Get interface address */
1072 		ret = 0;
1073 		sin->sin_addr.s_addr = ifa->ifa_local;
1074 		break;
1075 
1076 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1077 		ret = 0;
1078 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1079 		break;
1080 
1081 	case SIOCGIFDSTADDR:	/* Get the destination address */
1082 		ret = 0;
1083 		sin->sin_addr.s_addr = ifa->ifa_address;
1084 		break;
1085 
1086 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1087 		ret = 0;
1088 		sin->sin_addr.s_addr = ifa->ifa_mask;
1089 		break;
1090 
1091 	case SIOCSIFFLAGS:
1092 		if (colon) {
1093 			ret = -EADDRNOTAVAIL;
1094 			if (!ifa)
1095 				break;
1096 			ret = 0;
1097 			if (!(ifr->ifr_flags & IFF_UP))
1098 				inet_del_ifa(in_dev, ifap, 1);
1099 			break;
1100 		}
1101 		ret = dev_change_flags(dev, ifr->ifr_flags);
1102 		break;
1103 
1104 	case SIOCSIFADDR:	/* Set interface address (and family) */
1105 		ret = -EINVAL;
1106 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1107 			break;
1108 
1109 		if (!ifa) {
1110 			ret = -ENOBUFS;
1111 			ifa = inet_alloc_ifa();
1112 			if (!ifa)
1113 				break;
1114 			INIT_HLIST_NODE(&ifa->hash);
1115 			if (colon)
1116 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1117 			else
1118 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1119 		} else {
1120 			ret = 0;
1121 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1122 				break;
1123 			inet_del_ifa(in_dev, ifap, 0);
1124 			ifa->ifa_broadcast = 0;
1125 			ifa->ifa_scope = 0;
1126 		}
1127 
1128 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1129 
1130 		if (!(dev->flags & IFF_POINTOPOINT)) {
1131 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1132 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1133 			if ((dev->flags & IFF_BROADCAST) &&
1134 			    ifa->ifa_prefixlen < 31)
1135 				ifa->ifa_broadcast = ifa->ifa_address |
1136 						     ~ifa->ifa_mask;
1137 		} else {
1138 			ifa->ifa_prefixlen = 32;
1139 			ifa->ifa_mask = inet_make_mask(32);
1140 		}
1141 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1142 		ret = inet_set_ifa(dev, ifa);
1143 		break;
1144 
1145 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1146 		ret = 0;
1147 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1148 			inet_del_ifa(in_dev, ifap, 0);
1149 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1150 			inet_insert_ifa(ifa);
1151 		}
1152 		break;
1153 
1154 	case SIOCSIFDSTADDR:	/* Set the destination address */
1155 		ret = 0;
1156 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1157 			break;
1158 		ret = -EINVAL;
1159 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1160 			break;
1161 		ret = 0;
1162 		inet_del_ifa(in_dev, ifap, 0);
1163 		ifa->ifa_address = sin->sin_addr.s_addr;
1164 		inet_insert_ifa(ifa);
1165 		break;
1166 
1167 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1168 
1169 		/*
1170 		 *	The mask we set must be legal.
1171 		 */
1172 		ret = -EINVAL;
1173 		if (bad_mask(sin->sin_addr.s_addr, 0))
1174 			break;
1175 		ret = 0;
1176 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1177 			__be32 old_mask = ifa->ifa_mask;
1178 			inet_del_ifa(in_dev, ifap, 0);
1179 			ifa->ifa_mask = sin->sin_addr.s_addr;
1180 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1181 
1182 			/* See if current broadcast address matches
1183 			 * with current netmask, then recalculate
1184 			 * the broadcast address. Otherwise it's a
1185 			 * funny address, so don't touch it since
1186 			 * the user seems to know what (s)he's doing...
1187 			 */
1188 			if ((dev->flags & IFF_BROADCAST) &&
1189 			    (ifa->ifa_prefixlen < 31) &&
1190 			    (ifa->ifa_broadcast ==
1191 			     (ifa->ifa_local|~old_mask))) {
1192 				ifa->ifa_broadcast = (ifa->ifa_local |
1193 						      ~sin->sin_addr.s_addr);
1194 			}
1195 			inet_insert_ifa(ifa);
1196 		}
1197 		break;
1198 	}
1199 done:
1200 	rtnl_unlock();
1201 out:
1202 	return ret;
1203 }
1204 
1205 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1206 {
1207 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1208 	struct in_ifaddr *ifa;
1209 	struct ifreq ifr;
1210 	int done = 0;
1211 
1212 	if (WARN_ON(size > sizeof(struct ifreq)))
1213 		goto out;
1214 
1215 	if (!in_dev)
1216 		goto out;
1217 
1218 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1219 		if (!buf) {
1220 			done += size;
1221 			continue;
1222 		}
1223 		if (len < size)
1224 			break;
1225 		memset(&ifr, 0, sizeof(struct ifreq));
1226 		strcpy(ifr.ifr_name, ifa->ifa_label);
1227 
1228 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1229 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1230 								ifa->ifa_local;
1231 
1232 		if (copy_to_user(buf + done, &ifr, size)) {
1233 			done = -EFAULT;
1234 			break;
1235 		}
1236 		len  -= size;
1237 		done += size;
1238 	}
1239 out:
1240 	return done;
1241 }
1242 
1243 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1244 				 int scope)
1245 {
1246 	for_primary_ifa(in_dev) {
1247 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1248 		    ifa->ifa_scope <= scope)
1249 			return ifa->ifa_local;
1250 	} endfor_ifa(in_dev);
1251 
1252 	return 0;
1253 }
1254 
1255 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1256 {
1257 	__be32 addr = 0;
1258 	struct in_device *in_dev;
1259 	struct net *net = dev_net(dev);
1260 	int master_idx;
1261 
1262 	rcu_read_lock();
1263 	in_dev = __in_dev_get_rcu(dev);
1264 	if (!in_dev)
1265 		goto no_in_dev;
1266 
1267 	for_primary_ifa(in_dev) {
1268 		if (ifa->ifa_scope > scope)
1269 			continue;
1270 		if (!dst || inet_ifa_match(dst, ifa)) {
1271 			addr = ifa->ifa_local;
1272 			break;
1273 		}
1274 		if (!addr)
1275 			addr = ifa->ifa_local;
1276 	} endfor_ifa(in_dev);
1277 
1278 	if (addr)
1279 		goto out_unlock;
1280 no_in_dev:
1281 	master_idx = l3mdev_master_ifindex_rcu(dev);
1282 
1283 	/* For VRFs, the VRF device takes the place of the loopback device,
1284 	 * with addresses on it being preferred.  Note in such cases the
1285 	 * loopback device will be among the devices that fail the master_idx
1286 	 * equality check in the loop below.
1287 	 */
1288 	if (master_idx &&
1289 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1290 	    (in_dev = __in_dev_get_rcu(dev))) {
1291 		addr = in_dev_select_addr(in_dev, scope);
1292 		if (addr)
1293 			goto out_unlock;
1294 	}
1295 
1296 	/* Not loopback addresses on loopback should be preferred
1297 	   in this case. It is important that lo is the first interface
1298 	   in dev_base list.
1299 	 */
1300 	for_each_netdev_rcu(net, dev) {
1301 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1302 			continue;
1303 
1304 		in_dev = __in_dev_get_rcu(dev);
1305 		if (!in_dev)
1306 			continue;
1307 
1308 		addr = in_dev_select_addr(in_dev, scope);
1309 		if (addr)
1310 			goto out_unlock;
1311 	}
1312 out_unlock:
1313 	rcu_read_unlock();
1314 	return addr;
1315 }
1316 EXPORT_SYMBOL(inet_select_addr);
1317 
1318 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1319 			      __be32 local, int scope)
1320 {
1321 	int same = 0;
1322 	__be32 addr = 0;
1323 
1324 	for_ifa(in_dev) {
1325 		if (!addr &&
1326 		    (local == ifa->ifa_local || !local) &&
1327 		    ifa->ifa_scope <= scope) {
1328 			addr = ifa->ifa_local;
1329 			if (same)
1330 				break;
1331 		}
1332 		if (!same) {
1333 			same = (!local || inet_ifa_match(local, ifa)) &&
1334 				(!dst || inet_ifa_match(dst, ifa));
1335 			if (same && addr) {
1336 				if (local || !dst)
1337 					break;
1338 				/* Is the selected addr into dst subnet? */
1339 				if (inet_ifa_match(addr, ifa))
1340 					break;
1341 				/* No, then can we use new local src? */
1342 				if (ifa->ifa_scope <= scope) {
1343 					addr = ifa->ifa_local;
1344 					break;
1345 				}
1346 				/* search for large dst subnet for addr */
1347 				same = 0;
1348 			}
1349 		}
1350 	} endfor_ifa(in_dev);
1351 
1352 	return same ? addr : 0;
1353 }
1354 
1355 /*
1356  * Confirm that local IP address exists using wildcards:
1357  * - net: netns to check, cannot be NULL
1358  * - in_dev: only on this interface, NULL=any interface
1359  * - dst: only in the same subnet as dst, 0=any dst
1360  * - local: address, 0=autoselect the local address
1361  * - scope: maximum allowed scope value for the local address
1362  */
1363 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1364 			 __be32 dst, __be32 local, int scope)
1365 {
1366 	__be32 addr = 0;
1367 	struct net_device *dev;
1368 
1369 	if (in_dev)
1370 		return confirm_addr_indev(in_dev, dst, local, scope);
1371 
1372 	rcu_read_lock();
1373 	for_each_netdev_rcu(net, dev) {
1374 		in_dev = __in_dev_get_rcu(dev);
1375 		if (in_dev) {
1376 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1377 			if (addr)
1378 				break;
1379 		}
1380 	}
1381 	rcu_read_unlock();
1382 
1383 	return addr;
1384 }
1385 EXPORT_SYMBOL(inet_confirm_addr);
1386 
1387 /*
1388  *	Device notifier
1389  */
1390 
1391 int register_inetaddr_notifier(struct notifier_block *nb)
1392 {
1393 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1394 }
1395 EXPORT_SYMBOL(register_inetaddr_notifier);
1396 
1397 int unregister_inetaddr_notifier(struct notifier_block *nb)
1398 {
1399 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1400 }
1401 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1402 
1403 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1404 {
1405 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1406 }
1407 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1408 
1409 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1410 {
1411 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1412 	    nb);
1413 }
1414 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1415 
1416 /* Rename ifa_labels for a device name change. Make some effort to preserve
1417  * existing alias numbering and to create unique labels if possible.
1418 */
1419 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1420 {
1421 	struct in_ifaddr *ifa;
1422 	int named = 0;
1423 
1424 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1425 		char old[IFNAMSIZ], *dot;
1426 
1427 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1428 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1429 		if (named++ == 0)
1430 			goto skip;
1431 		dot = strchr(old, ':');
1432 		if (!dot) {
1433 			sprintf(old, ":%d", named);
1434 			dot = old;
1435 		}
1436 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1437 			strcat(ifa->ifa_label, dot);
1438 		else
1439 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1440 skip:
1441 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1442 	}
1443 }
1444 
1445 static bool inetdev_valid_mtu(unsigned int mtu)
1446 {
1447 	return mtu >= IPV4_MIN_MTU;
1448 }
1449 
1450 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1451 					struct in_device *in_dev)
1452 
1453 {
1454 	struct in_ifaddr *ifa;
1455 
1456 	for (ifa = in_dev->ifa_list; ifa;
1457 	     ifa = ifa->ifa_next) {
1458 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1459 			 ifa->ifa_local, dev,
1460 			 ifa->ifa_local, NULL,
1461 			 dev->dev_addr, NULL);
1462 	}
1463 }
1464 
1465 /* Called only under RTNL semaphore */
1466 
1467 static int inetdev_event(struct notifier_block *this, unsigned long event,
1468 			 void *ptr)
1469 {
1470 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1471 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1472 
1473 	ASSERT_RTNL();
1474 
1475 	if (!in_dev) {
1476 		if (event == NETDEV_REGISTER) {
1477 			in_dev = inetdev_init(dev);
1478 			if (IS_ERR(in_dev))
1479 				return notifier_from_errno(PTR_ERR(in_dev));
1480 			if (dev->flags & IFF_LOOPBACK) {
1481 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1482 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1483 			}
1484 		} else if (event == NETDEV_CHANGEMTU) {
1485 			/* Re-enabling IP */
1486 			if (inetdev_valid_mtu(dev->mtu))
1487 				in_dev = inetdev_init(dev);
1488 		}
1489 		goto out;
1490 	}
1491 
1492 	switch (event) {
1493 	case NETDEV_REGISTER:
1494 		pr_debug("%s: bug\n", __func__);
1495 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1496 		break;
1497 	case NETDEV_UP:
1498 		if (!inetdev_valid_mtu(dev->mtu))
1499 			break;
1500 		if (dev->flags & IFF_LOOPBACK) {
1501 			struct in_ifaddr *ifa = inet_alloc_ifa();
1502 
1503 			if (ifa) {
1504 				INIT_HLIST_NODE(&ifa->hash);
1505 				ifa->ifa_local =
1506 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1507 				ifa->ifa_prefixlen = 8;
1508 				ifa->ifa_mask = inet_make_mask(8);
1509 				in_dev_hold(in_dev);
1510 				ifa->ifa_dev = in_dev;
1511 				ifa->ifa_scope = RT_SCOPE_HOST;
1512 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1513 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1514 						 INFINITY_LIFE_TIME);
1515 				ipv4_devconf_setall(in_dev);
1516 				neigh_parms_data_state_setall(in_dev->arp_parms);
1517 				inet_insert_ifa(ifa);
1518 			}
1519 		}
1520 		ip_mc_up(in_dev);
1521 		/* fall through */
1522 	case NETDEV_CHANGEADDR:
1523 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1524 			break;
1525 		/* fall through */
1526 	case NETDEV_NOTIFY_PEERS:
1527 		/* Send gratuitous ARP to notify of link change */
1528 		inetdev_send_gratuitous_arp(dev, in_dev);
1529 		break;
1530 	case NETDEV_DOWN:
1531 		ip_mc_down(in_dev);
1532 		break;
1533 	case NETDEV_PRE_TYPE_CHANGE:
1534 		ip_mc_unmap(in_dev);
1535 		break;
1536 	case NETDEV_POST_TYPE_CHANGE:
1537 		ip_mc_remap(in_dev);
1538 		break;
1539 	case NETDEV_CHANGEMTU:
1540 		if (inetdev_valid_mtu(dev->mtu))
1541 			break;
1542 		/* disable IP when MTU is not enough */
1543 		/* fall through */
1544 	case NETDEV_UNREGISTER:
1545 		inetdev_destroy(in_dev);
1546 		break;
1547 	case NETDEV_CHANGENAME:
1548 		/* Do not notify about label change, this event is
1549 		 * not interesting to applications using netlink.
1550 		 */
1551 		inetdev_changename(dev, in_dev);
1552 
1553 		devinet_sysctl_unregister(in_dev);
1554 		devinet_sysctl_register(in_dev);
1555 		break;
1556 	}
1557 out:
1558 	return NOTIFY_DONE;
1559 }
1560 
1561 static struct notifier_block ip_netdev_notifier = {
1562 	.notifier_call = inetdev_event,
1563 };
1564 
1565 static size_t inet_nlmsg_size(void)
1566 {
1567 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1568 	       + nla_total_size(4) /* IFA_ADDRESS */
1569 	       + nla_total_size(4) /* IFA_LOCAL */
1570 	       + nla_total_size(4) /* IFA_BROADCAST */
1571 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1572 	       + nla_total_size(4)  /* IFA_FLAGS */
1573 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1574 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1575 }
1576 
1577 static inline u32 cstamp_delta(unsigned long cstamp)
1578 {
1579 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1580 }
1581 
1582 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1583 			 unsigned long tstamp, u32 preferred, u32 valid)
1584 {
1585 	struct ifa_cacheinfo ci;
1586 
1587 	ci.cstamp = cstamp_delta(cstamp);
1588 	ci.tstamp = cstamp_delta(tstamp);
1589 	ci.ifa_prefered = preferred;
1590 	ci.ifa_valid = valid;
1591 
1592 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1593 }
1594 
1595 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1596 			    struct inet_fill_args *args)
1597 {
1598 	struct ifaddrmsg *ifm;
1599 	struct nlmsghdr  *nlh;
1600 	u32 preferred, valid;
1601 
1602 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1603 			args->flags);
1604 	if (!nlh)
1605 		return -EMSGSIZE;
1606 
1607 	ifm = nlmsg_data(nlh);
1608 	ifm->ifa_family = AF_INET;
1609 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1610 	ifm->ifa_flags = ifa->ifa_flags;
1611 	ifm->ifa_scope = ifa->ifa_scope;
1612 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1613 
1614 	if (args->netnsid >= 0 &&
1615 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1616 		goto nla_put_failure;
1617 
1618 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1619 		preferred = ifa->ifa_preferred_lft;
1620 		valid = ifa->ifa_valid_lft;
1621 		if (preferred != INFINITY_LIFE_TIME) {
1622 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1623 
1624 			if (preferred > tval)
1625 				preferred -= tval;
1626 			else
1627 				preferred = 0;
1628 			if (valid != INFINITY_LIFE_TIME) {
1629 				if (valid > tval)
1630 					valid -= tval;
1631 				else
1632 					valid = 0;
1633 			}
1634 		}
1635 	} else {
1636 		preferred = INFINITY_LIFE_TIME;
1637 		valid = INFINITY_LIFE_TIME;
1638 	}
1639 	if ((ifa->ifa_address &&
1640 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1641 	    (ifa->ifa_local &&
1642 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1643 	    (ifa->ifa_broadcast &&
1644 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1645 	    (ifa->ifa_label[0] &&
1646 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1647 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1648 	    (ifa->ifa_rt_priority &&
1649 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1650 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1651 			  preferred, valid))
1652 		goto nla_put_failure;
1653 
1654 	nlmsg_end(skb, nlh);
1655 	return 0;
1656 
1657 nla_put_failure:
1658 	nlmsg_cancel(skb, nlh);
1659 	return -EMSGSIZE;
1660 }
1661 
1662 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1663 {
1664 	struct inet_fill_args fillargs = {
1665 		.portid = NETLINK_CB(cb->skb).portid,
1666 		.seq = cb->nlh->nlmsg_seq,
1667 		.event = RTM_NEWADDR,
1668 		.flags = NLM_F_MULTI,
1669 		.netnsid = -1,
1670 	};
1671 	struct net *net = sock_net(skb->sk);
1672 	struct nlattr *tb[IFA_MAX+1];
1673 	struct net *tgt_net = net;
1674 	int h, s_h;
1675 	int idx, s_idx;
1676 	int ip_idx, s_ip_idx;
1677 	struct net_device *dev;
1678 	struct in_device *in_dev;
1679 	struct in_ifaddr *ifa;
1680 	struct hlist_head *head;
1681 
1682 	s_h = cb->args[0];
1683 	s_idx = idx = cb->args[1];
1684 	s_ip_idx = ip_idx = cb->args[2];
1685 
1686 	if (nlmsg_parse(cb->nlh, sizeof(struct ifaddrmsg), tb, IFA_MAX,
1687 			ifa_ipv4_policy, NULL) >= 0) {
1688 		if (tb[IFA_TARGET_NETNSID]) {
1689 			fillargs.netnsid = nla_get_s32(tb[IFA_TARGET_NETNSID]);
1690 
1691 			tgt_net = rtnl_get_net_ns_capable(skb->sk,
1692 							  fillargs.netnsid);
1693 			if (IS_ERR(tgt_net))
1694 				return PTR_ERR(tgt_net);
1695 		}
1696 	}
1697 
1698 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1699 		idx = 0;
1700 		head = &tgt_net->dev_index_head[h];
1701 		rcu_read_lock();
1702 		cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1703 			  tgt_net->dev_base_seq;
1704 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1705 			if (idx < s_idx)
1706 				goto cont;
1707 			if (h > s_h || idx > s_idx)
1708 				s_ip_idx = 0;
1709 			in_dev = __in_dev_get_rcu(dev);
1710 			if (!in_dev)
1711 				goto cont;
1712 
1713 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1714 			     ifa = ifa->ifa_next, ip_idx++) {
1715 				if (ip_idx < s_ip_idx)
1716 					continue;
1717 				if (inet_fill_ifaddr(skb, ifa, &fillargs) < 0) {
1718 					rcu_read_unlock();
1719 					goto done;
1720 				}
1721 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1722 			}
1723 cont:
1724 			idx++;
1725 		}
1726 		rcu_read_unlock();
1727 	}
1728 
1729 done:
1730 	cb->args[0] = h;
1731 	cb->args[1] = idx;
1732 	cb->args[2] = ip_idx;
1733 	if (fillargs.netnsid >= 0)
1734 		put_net(tgt_net);
1735 
1736 	return skb->len;
1737 }
1738 
1739 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1740 		      u32 portid)
1741 {
1742 	struct inet_fill_args fillargs = {
1743 		.portid = portid,
1744 		.seq = nlh ? nlh->nlmsg_seq : 0,
1745 		.event = event,
1746 		.flags = 0,
1747 		.netnsid = -1,
1748 	};
1749 	struct sk_buff *skb;
1750 	int err = -ENOBUFS;
1751 	struct net *net;
1752 
1753 	net = dev_net(ifa->ifa_dev->dev);
1754 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1755 	if (!skb)
1756 		goto errout;
1757 
1758 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1759 	if (err < 0) {
1760 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1761 		WARN_ON(err == -EMSGSIZE);
1762 		kfree_skb(skb);
1763 		goto errout;
1764 	}
1765 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1766 	return;
1767 errout:
1768 	if (err < 0)
1769 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1770 }
1771 
1772 static size_t inet_get_link_af_size(const struct net_device *dev,
1773 				    u32 ext_filter_mask)
1774 {
1775 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1776 
1777 	if (!in_dev)
1778 		return 0;
1779 
1780 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1781 }
1782 
1783 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1784 			     u32 ext_filter_mask)
1785 {
1786 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1787 	struct nlattr *nla;
1788 	int i;
1789 
1790 	if (!in_dev)
1791 		return -ENODATA;
1792 
1793 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1794 	if (!nla)
1795 		return -EMSGSIZE;
1796 
1797 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1798 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1799 
1800 	return 0;
1801 }
1802 
1803 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1804 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1805 };
1806 
1807 static int inet_validate_link_af(const struct net_device *dev,
1808 				 const struct nlattr *nla)
1809 {
1810 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1811 	int err, rem;
1812 
1813 	if (dev && !__in_dev_get_rcu(dev))
1814 		return -EAFNOSUPPORT;
1815 
1816 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1817 	if (err < 0)
1818 		return err;
1819 
1820 	if (tb[IFLA_INET_CONF]) {
1821 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1822 			int cfgid = nla_type(a);
1823 
1824 			if (nla_len(a) < 4)
1825 				return -EINVAL;
1826 
1827 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1828 				return -EINVAL;
1829 		}
1830 	}
1831 
1832 	return 0;
1833 }
1834 
1835 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1836 {
1837 	struct in_device *in_dev = __in_dev_get_rcu(dev);
1838 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1839 	int rem;
1840 
1841 	if (!in_dev)
1842 		return -EAFNOSUPPORT;
1843 
1844 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1845 		BUG();
1846 
1847 	if (tb[IFLA_INET_CONF]) {
1848 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1849 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1850 	}
1851 
1852 	return 0;
1853 }
1854 
1855 static int inet_netconf_msgsize_devconf(int type)
1856 {
1857 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1858 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1859 	bool all = false;
1860 
1861 	if (type == NETCONFA_ALL)
1862 		all = true;
1863 
1864 	if (all || type == NETCONFA_FORWARDING)
1865 		size += nla_total_size(4);
1866 	if (all || type == NETCONFA_RP_FILTER)
1867 		size += nla_total_size(4);
1868 	if (all || type == NETCONFA_MC_FORWARDING)
1869 		size += nla_total_size(4);
1870 	if (all || type == NETCONFA_BC_FORWARDING)
1871 		size += nla_total_size(4);
1872 	if (all || type == NETCONFA_PROXY_NEIGH)
1873 		size += nla_total_size(4);
1874 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1875 		size += nla_total_size(4);
1876 
1877 	return size;
1878 }
1879 
1880 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1881 				     struct ipv4_devconf *devconf, u32 portid,
1882 				     u32 seq, int event, unsigned int flags,
1883 				     int type)
1884 {
1885 	struct nlmsghdr  *nlh;
1886 	struct netconfmsg *ncm;
1887 	bool all = false;
1888 
1889 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1890 			flags);
1891 	if (!nlh)
1892 		return -EMSGSIZE;
1893 
1894 	if (type == NETCONFA_ALL)
1895 		all = true;
1896 
1897 	ncm = nlmsg_data(nlh);
1898 	ncm->ncm_family = AF_INET;
1899 
1900 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1901 		goto nla_put_failure;
1902 
1903 	if (!devconf)
1904 		goto out;
1905 
1906 	if ((all || type == NETCONFA_FORWARDING) &&
1907 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1908 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1909 		goto nla_put_failure;
1910 	if ((all || type == NETCONFA_RP_FILTER) &&
1911 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1912 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1913 		goto nla_put_failure;
1914 	if ((all || type == NETCONFA_MC_FORWARDING) &&
1915 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1916 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1917 		goto nla_put_failure;
1918 	if ((all || type == NETCONFA_BC_FORWARDING) &&
1919 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
1920 			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
1921 		goto nla_put_failure;
1922 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
1923 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1924 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1925 		goto nla_put_failure;
1926 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1927 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1928 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1929 		goto nla_put_failure;
1930 
1931 out:
1932 	nlmsg_end(skb, nlh);
1933 	return 0;
1934 
1935 nla_put_failure:
1936 	nlmsg_cancel(skb, nlh);
1937 	return -EMSGSIZE;
1938 }
1939 
1940 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1941 				 int ifindex, struct ipv4_devconf *devconf)
1942 {
1943 	struct sk_buff *skb;
1944 	int err = -ENOBUFS;
1945 
1946 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1947 	if (!skb)
1948 		goto errout;
1949 
1950 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1951 					event, 0, type);
1952 	if (err < 0) {
1953 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1954 		WARN_ON(err == -EMSGSIZE);
1955 		kfree_skb(skb);
1956 		goto errout;
1957 	}
1958 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1959 	return;
1960 errout:
1961 	if (err < 0)
1962 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1963 }
1964 
1965 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1966 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1967 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1968 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1969 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1970 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
1971 };
1972 
1973 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1974 				    struct nlmsghdr *nlh,
1975 				    struct netlink_ext_ack *extack)
1976 {
1977 	struct net *net = sock_net(in_skb->sk);
1978 	struct nlattr *tb[NETCONFA_MAX+1];
1979 	struct netconfmsg *ncm;
1980 	struct sk_buff *skb;
1981 	struct ipv4_devconf *devconf;
1982 	struct in_device *in_dev;
1983 	struct net_device *dev;
1984 	int ifindex;
1985 	int err;
1986 
1987 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1988 			  devconf_ipv4_policy, extack);
1989 	if (err < 0)
1990 		goto errout;
1991 
1992 	err = -EINVAL;
1993 	if (!tb[NETCONFA_IFINDEX])
1994 		goto errout;
1995 
1996 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1997 	switch (ifindex) {
1998 	case NETCONFA_IFINDEX_ALL:
1999 		devconf = net->ipv4.devconf_all;
2000 		break;
2001 	case NETCONFA_IFINDEX_DEFAULT:
2002 		devconf = net->ipv4.devconf_dflt;
2003 		break;
2004 	default:
2005 		dev = __dev_get_by_index(net, ifindex);
2006 		if (!dev)
2007 			goto errout;
2008 		in_dev = __in_dev_get_rtnl(dev);
2009 		if (!in_dev)
2010 			goto errout;
2011 		devconf = &in_dev->cnf;
2012 		break;
2013 	}
2014 
2015 	err = -ENOBUFS;
2016 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2017 	if (!skb)
2018 		goto errout;
2019 
2020 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2021 					NETLINK_CB(in_skb).portid,
2022 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2023 					NETCONFA_ALL);
2024 	if (err < 0) {
2025 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2026 		WARN_ON(err == -EMSGSIZE);
2027 		kfree_skb(skb);
2028 		goto errout;
2029 	}
2030 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2031 errout:
2032 	return err;
2033 }
2034 
2035 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2036 				     struct netlink_callback *cb)
2037 {
2038 	struct net *net = sock_net(skb->sk);
2039 	int h, s_h;
2040 	int idx, s_idx;
2041 	struct net_device *dev;
2042 	struct in_device *in_dev;
2043 	struct hlist_head *head;
2044 
2045 	s_h = cb->args[0];
2046 	s_idx = idx = cb->args[1];
2047 
2048 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2049 		idx = 0;
2050 		head = &net->dev_index_head[h];
2051 		rcu_read_lock();
2052 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2053 			  net->dev_base_seq;
2054 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2055 			if (idx < s_idx)
2056 				goto cont;
2057 			in_dev = __in_dev_get_rcu(dev);
2058 			if (!in_dev)
2059 				goto cont;
2060 
2061 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2062 						      &in_dev->cnf,
2063 						      NETLINK_CB(cb->skb).portid,
2064 						      cb->nlh->nlmsg_seq,
2065 						      RTM_NEWNETCONF,
2066 						      NLM_F_MULTI,
2067 						      NETCONFA_ALL) < 0) {
2068 				rcu_read_unlock();
2069 				goto done;
2070 			}
2071 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2072 cont:
2073 			idx++;
2074 		}
2075 		rcu_read_unlock();
2076 	}
2077 	if (h == NETDEV_HASHENTRIES) {
2078 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2079 					      net->ipv4.devconf_all,
2080 					      NETLINK_CB(cb->skb).portid,
2081 					      cb->nlh->nlmsg_seq,
2082 					      RTM_NEWNETCONF, NLM_F_MULTI,
2083 					      NETCONFA_ALL) < 0)
2084 			goto done;
2085 		else
2086 			h++;
2087 	}
2088 	if (h == NETDEV_HASHENTRIES + 1) {
2089 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2090 					      net->ipv4.devconf_dflt,
2091 					      NETLINK_CB(cb->skb).portid,
2092 					      cb->nlh->nlmsg_seq,
2093 					      RTM_NEWNETCONF, NLM_F_MULTI,
2094 					      NETCONFA_ALL) < 0)
2095 			goto done;
2096 		else
2097 			h++;
2098 	}
2099 done:
2100 	cb->args[0] = h;
2101 	cb->args[1] = idx;
2102 
2103 	return skb->len;
2104 }
2105 
2106 #ifdef CONFIG_SYSCTL
2107 
2108 static void devinet_copy_dflt_conf(struct net *net, int i)
2109 {
2110 	struct net_device *dev;
2111 
2112 	rcu_read_lock();
2113 	for_each_netdev_rcu(net, dev) {
2114 		struct in_device *in_dev;
2115 
2116 		in_dev = __in_dev_get_rcu(dev);
2117 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2118 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2119 	}
2120 	rcu_read_unlock();
2121 }
2122 
2123 /* called with RTNL locked */
2124 static void inet_forward_change(struct net *net)
2125 {
2126 	struct net_device *dev;
2127 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2128 
2129 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2130 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2131 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2132 				    NETCONFA_FORWARDING,
2133 				    NETCONFA_IFINDEX_ALL,
2134 				    net->ipv4.devconf_all);
2135 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2136 				    NETCONFA_FORWARDING,
2137 				    NETCONFA_IFINDEX_DEFAULT,
2138 				    net->ipv4.devconf_dflt);
2139 
2140 	for_each_netdev(net, dev) {
2141 		struct in_device *in_dev;
2142 
2143 		if (on)
2144 			dev_disable_lro(dev);
2145 
2146 		in_dev = __in_dev_get_rtnl(dev);
2147 		if (in_dev) {
2148 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2149 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2150 						    NETCONFA_FORWARDING,
2151 						    dev->ifindex, &in_dev->cnf);
2152 		}
2153 	}
2154 }
2155 
2156 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2157 {
2158 	if (cnf == net->ipv4.devconf_dflt)
2159 		return NETCONFA_IFINDEX_DEFAULT;
2160 	else if (cnf == net->ipv4.devconf_all)
2161 		return NETCONFA_IFINDEX_ALL;
2162 	else {
2163 		struct in_device *idev
2164 			= container_of(cnf, struct in_device, cnf);
2165 		return idev->dev->ifindex;
2166 	}
2167 }
2168 
2169 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2170 			     void __user *buffer,
2171 			     size_t *lenp, loff_t *ppos)
2172 {
2173 	int old_value = *(int *)ctl->data;
2174 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2175 	int new_value = *(int *)ctl->data;
2176 
2177 	if (write) {
2178 		struct ipv4_devconf *cnf = ctl->extra1;
2179 		struct net *net = ctl->extra2;
2180 		int i = (int *)ctl->data - cnf->data;
2181 		int ifindex;
2182 
2183 		set_bit(i, cnf->state);
2184 
2185 		if (cnf == net->ipv4.devconf_dflt)
2186 			devinet_copy_dflt_conf(net, i);
2187 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2188 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2189 			if ((new_value == 0) && (old_value != 0))
2190 				rt_cache_flush(net);
2191 
2192 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2193 		    new_value != old_value)
2194 			rt_cache_flush(net);
2195 
2196 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2197 		    new_value != old_value) {
2198 			ifindex = devinet_conf_ifindex(net, cnf);
2199 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2200 						    NETCONFA_RP_FILTER,
2201 						    ifindex, cnf);
2202 		}
2203 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2204 		    new_value != old_value) {
2205 			ifindex = devinet_conf_ifindex(net, cnf);
2206 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2207 						    NETCONFA_PROXY_NEIGH,
2208 						    ifindex, cnf);
2209 		}
2210 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2211 		    new_value != old_value) {
2212 			ifindex = devinet_conf_ifindex(net, cnf);
2213 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2214 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2215 						    ifindex, cnf);
2216 		}
2217 	}
2218 
2219 	return ret;
2220 }
2221 
2222 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2223 				  void __user *buffer,
2224 				  size_t *lenp, loff_t *ppos)
2225 {
2226 	int *valp = ctl->data;
2227 	int val = *valp;
2228 	loff_t pos = *ppos;
2229 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2230 
2231 	if (write && *valp != val) {
2232 		struct net *net = ctl->extra2;
2233 
2234 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2235 			if (!rtnl_trylock()) {
2236 				/* Restore the original values before restarting */
2237 				*valp = val;
2238 				*ppos = pos;
2239 				return restart_syscall();
2240 			}
2241 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2242 				inet_forward_change(net);
2243 			} else {
2244 				struct ipv4_devconf *cnf = ctl->extra1;
2245 				struct in_device *idev =
2246 					container_of(cnf, struct in_device, cnf);
2247 				if (*valp)
2248 					dev_disable_lro(idev->dev);
2249 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2250 							    NETCONFA_FORWARDING,
2251 							    idev->dev->ifindex,
2252 							    cnf);
2253 			}
2254 			rtnl_unlock();
2255 			rt_cache_flush(net);
2256 		} else
2257 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2258 						    NETCONFA_FORWARDING,
2259 						    NETCONFA_IFINDEX_DEFAULT,
2260 						    net->ipv4.devconf_dflt);
2261 	}
2262 
2263 	return ret;
2264 }
2265 
2266 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2267 				void __user *buffer,
2268 				size_t *lenp, loff_t *ppos)
2269 {
2270 	int *valp = ctl->data;
2271 	int val = *valp;
2272 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2273 	struct net *net = ctl->extra2;
2274 
2275 	if (write && *valp != val)
2276 		rt_cache_flush(net);
2277 
2278 	return ret;
2279 }
2280 
2281 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2282 	{ \
2283 		.procname	= name, \
2284 		.data		= ipv4_devconf.data + \
2285 				  IPV4_DEVCONF_ ## attr - 1, \
2286 		.maxlen		= sizeof(int), \
2287 		.mode		= mval, \
2288 		.proc_handler	= proc, \
2289 		.extra1		= &ipv4_devconf, \
2290 	}
2291 
2292 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2293 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2294 
2295 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2296 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2297 
2298 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2299 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2300 
2301 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2302 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2303 
2304 static struct devinet_sysctl_table {
2305 	struct ctl_table_header *sysctl_header;
2306 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2307 } devinet_sysctl = {
2308 	.devinet_vars = {
2309 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2310 					     devinet_sysctl_forward),
2311 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2312 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2313 
2314 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2315 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2316 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2317 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2318 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2319 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2320 					"accept_source_route"),
2321 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2322 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2323 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2324 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2325 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2326 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2327 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2328 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2329 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2330 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2331 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2332 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2333 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2334 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2335 					"force_igmp_version"),
2336 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2337 					"igmpv2_unsolicited_report_interval"),
2338 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2339 					"igmpv3_unsolicited_report_interval"),
2340 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2341 					"ignore_routes_with_linkdown"),
2342 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2343 					"drop_gratuitous_arp"),
2344 
2345 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2346 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2347 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2348 					      "promote_secondaries"),
2349 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2350 					      "route_localnet"),
2351 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2352 					      "drop_unicast_in_l2_multicast"),
2353 	},
2354 };
2355 
2356 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2357 				     int ifindex, struct ipv4_devconf *p)
2358 {
2359 	int i;
2360 	struct devinet_sysctl_table *t;
2361 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2362 
2363 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2364 	if (!t)
2365 		goto out;
2366 
2367 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2368 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2369 		t->devinet_vars[i].extra1 = p;
2370 		t->devinet_vars[i].extra2 = net;
2371 	}
2372 
2373 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2374 
2375 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2376 	if (!t->sysctl_header)
2377 		goto free;
2378 
2379 	p->sysctl = t;
2380 
2381 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2382 				    ifindex, p);
2383 	return 0;
2384 
2385 free:
2386 	kfree(t);
2387 out:
2388 	return -ENOBUFS;
2389 }
2390 
2391 static void __devinet_sysctl_unregister(struct net *net,
2392 					struct ipv4_devconf *cnf, int ifindex)
2393 {
2394 	struct devinet_sysctl_table *t = cnf->sysctl;
2395 
2396 	if (t) {
2397 		cnf->sysctl = NULL;
2398 		unregister_net_sysctl_table(t->sysctl_header);
2399 		kfree(t);
2400 	}
2401 
2402 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2403 }
2404 
2405 static int devinet_sysctl_register(struct in_device *idev)
2406 {
2407 	int err;
2408 
2409 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2410 		return -EINVAL;
2411 
2412 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2413 	if (err)
2414 		return err;
2415 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2416 					idev->dev->ifindex, &idev->cnf);
2417 	if (err)
2418 		neigh_sysctl_unregister(idev->arp_parms);
2419 	return err;
2420 }
2421 
2422 static void devinet_sysctl_unregister(struct in_device *idev)
2423 {
2424 	struct net *net = dev_net(idev->dev);
2425 
2426 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2427 	neigh_sysctl_unregister(idev->arp_parms);
2428 }
2429 
2430 static struct ctl_table ctl_forward_entry[] = {
2431 	{
2432 		.procname	= "ip_forward",
2433 		.data		= &ipv4_devconf.data[
2434 					IPV4_DEVCONF_FORWARDING - 1],
2435 		.maxlen		= sizeof(int),
2436 		.mode		= 0644,
2437 		.proc_handler	= devinet_sysctl_forward,
2438 		.extra1		= &ipv4_devconf,
2439 		.extra2		= &init_net,
2440 	},
2441 	{ },
2442 };
2443 #endif
2444 
2445 static __net_init int devinet_init_net(struct net *net)
2446 {
2447 	int err;
2448 	struct ipv4_devconf *all, *dflt;
2449 #ifdef CONFIG_SYSCTL
2450 	struct ctl_table *tbl = ctl_forward_entry;
2451 	struct ctl_table_header *forw_hdr;
2452 #endif
2453 
2454 	err = -ENOMEM;
2455 	all = &ipv4_devconf;
2456 	dflt = &ipv4_devconf_dflt;
2457 
2458 	if (!net_eq(net, &init_net)) {
2459 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2460 		if (!all)
2461 			goto err_alloc_all;
2462 
2463 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2464 		if (!dflt)
2465 			goto err_alloc_dflt;
2466 
2467 #ifdef CONFIG_SYSCTL
2468 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2469 		if (!tbl)
2470 			goto err_alloc_ctl;
2471 
2472 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2473 		tbl[0].extra1 = all;
2474 		tbl[0].extra2 = net;
2475 #endif
2476 	}
2477 
2478 #ifdef CONFIG_SYSCTL
2479 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2480 	if (err < 0)
2481 		goto err_reg_all;
2482 
2483 	err = __devinet_sysctl_register(net, "default",
2484 					NETCONFA_IFINDEX_DEFAULT, dflt);
2485 	if (err < 0)
2486 		goto err_reg_dflt;
2487 
2488 	err = -ENOMEM;
2489 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2490 	if (!forw_hdr)
2491 		goto err_reg_ctl;
2492 	net->ipv4.forw_hdr = forw_hdr;
2493 #endif
2494 
2495 	net->ipv4.devconf_all = all;
2496 	net->ipv4.devconf_dflt = dflt;
2497 	return 0;
2498 
2499 #ifdef CONFIG_SYSCTL
2500 err_reg_ctl:
2501 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2502 err_reg_dflt:
2503 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2504 err_reg_all:
2505 	if (tbl != ctl_forward_entry)
2506 		kfree(tbl);
2507 err_alloc_ctl:
2508 #endif
2509 	if (dflt != &ipv4_devconf_dflt)
2510 		kfree(dflt);
2511 err_alloc_dflt:
2512 	if (all != &ipv4_devconf)
2513 		kfree(all);
2514 err_alloc_all:
2515 	return err;
2516 }
2517 
2518 static __net_exit void devinet_exit_net(struct net *net)
2519 {
2520 #ifdef CONFIG_SYSCTL
2521 	struct ctl_table *tbl;
2522 
2523 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2524 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2525 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2526 				    NETCONFA_IFINDEX_DEFAULT);
2527 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2528 				    NETCONFA_IFINDEX_ALL);
2529 	kfree(tbl);
2530 #endif
2531 	kfree(net->ipv4.devconf_dflt);
2532 	kfree(net->ipv4.devconf_all);
2533 }
2534 
2535 static __net_initdata struct pernet_operations devinet_ops = {
2536 	.init = devinet_init_net,
2537 	.exit = devinet_exit_net,
2538 };
2539 
2540 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2541 	.family		  = AF_INET,
2542 	.fill_link_af	  = inet_fill_link_af,
2543 	.get_link_af_size = inet_get_link_af_size,
2544 	.validate_link_af = inet_validate_link_af,
2545 	.set_link_af	  = inet_set_link_af,
2546 };
2547 
2548 void __init devinet_init(void)
2549 {
2550 	int i;
2551 
2552 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2553 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2554 
2555 	register_pernet_subsys(&devinet_ops);
2556 
2557 	register_gifconf(PF_INET, inet_gifconf);
2558 	register_netdevice_notifier(&ip_netdev_notifier);
2559 
2560 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2561 
2562 	rtnl_af_register(&inet_af_ops);
2563 
2564 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2565 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2566 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2567 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2568 		      inet_netconf_dump_devconf, 0);
2569 }
2570