xref: /openbmc/linux/net/ipv4/devinet.c (revision fe4549b1)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	NET3	IP device support routines.
4  *
5  *	Derived from the IP parts of dev.c 1.0.19
6  * 		Authors:	Ross Biro
7  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *	Additional Authors:
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *	Changes:
15  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16  *					lists.
17  *		Cyrus Durgin:		updated for kmod
18  *		Matthias Andree:	in devinet_ioctl, compare label and
19  *					address (4.4BSD alias style support),
20  *					fall back to comparing just the label
21  *					if no match found.
22  */
23 
24 
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56 
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64 
65 #define IPV6ONLY_FLAGS	\
66 		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79 	},
80 };
81 
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83 	.data = {
84 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92 	},
93 };
94 
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97 
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 	[IFA_LOCAL]     	= { .type = NLA_U32 },
100 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
101 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104 	[IFA_FLAGS]		= { .type = NLA_U32 },
105 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107 	[IFA_PROTO]		= { .type = NLA_U8 },
108 };
109 
110 struct inet_fill_args {
111 	u32 portid;
112 	u32 seq;
113 	int event;
114 	unsigned int flags;
115 	int netnsid;
116 	int ifindex;
117 };
118 
119 #define IN4_ADDR_HSIZE_SHIFT	8
120 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
121 
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123 
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
125 {
126 	u32 val = (__force u32) addr ^ net_hash_mix(net);
127 
128 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129 }
130 
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132 {
133 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
134 
135 	ASSERT_RTNL();
136 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137 }
138 
139 static void inet_hash_remove(struct in_ifaddr *ifa)
140 {
141 	ASSERT_RTNL();
142 	hlist_del_init_rcu(&ifa->hash);
143 }
144 
145 /**
146  * __ip_dev_find - find the first device with a given source address.
147  * @net: the net namespace
148  * @addr: the source address
149  * @devref: if true, take a reference on the found device
150  *
151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
152  */
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154 {
155 	struct net_device *result = NULL;
156 	struct in_ifaddr *ifa;
157 
158 	rcu_read_lock();
159 	ifa = inet_lookup_ifaddr_rcu(net, addr);
160 	if (!ifa) {
161 		struct flowi4 fl4 = { .daddr = addr };
162 		struct fib_result res = { 0 };
163 		struct fib_table *local;
164 
165 		/* Fallback to FIB local table so that communication
166 		 * over loopback subnets work.
167 		 */
168 		local = fib_get_table(net, RT_TABLE_LOCAL);
169 		if (local &&
170 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171 		    res.type == RTN_LOCAL)
172 			result = FIB_RES_DEV(res);
173 	} else {
174 		result = ifa->ifa_dev->dev;
175 	}
176 	if (result && devref)
177 		dev_hold(result);
178 	rcu_read_unlock();
179 	return result;
180 }
181 EXPORT_SYMBOL(__ip_dev_find);
182 
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185 {
186 	u32 hash = inet_addr_hash(net, addr);
187 	struct in_ifaddr *ifa;
188 
189 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190 		if (ifa->ifa_local == addr &&
191 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
192 			return ifa;
193 
194 	return NULL;
195 }
196 
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198 
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202 			 struct in_ifaddr __rcu **ifap,
203 			 int destroy);
204 #ifdef CONFIG_SYSCTL
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
207 #else
208 static int devinet_sysctl_register(struct in_device *idev)
209 {
210 	return 0;
211 }
212 static void devinet_sysctl_unregister(struct in_device *idev)
213 {
214 }
215 #endif
216 
217 /* Locks all the inet devices. */
218 
219 static struct in_ifaddr *inet_alloc_ifa(void)
220 {
221 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222 }
223 
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227 	if (ifa->ifa_dev)
228 		in_dev_put(ifa->ifa_dev);
229 	kfree(ifa);
230 }
231 
232 static void inet_free_ifa(struct in_ifaddr *ifa)
233 {
234 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
235 }
236 
237 static void in_dev_free_rcu(struct rcu_head *head)
238 {
239 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
240 
241 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
242 	kfree(idev);
243 }
244 
245 void in_dev_finish_destroy(struct in_device *idev)
246 {
247 	struct net_device *dev = idev->dev;
248 
249 	WARN_ON(idev->ifa_list);
250 	WARN_ON(idev->mc_list);
251 #ifdef NET_REFCNT_DEBUG
252 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
253 #endif
254 	netdev_put(dev, &idev->dev_tracker);
255 	if (!idev->dead)
256 		pr_err("Freeing alive in_device %p\n", idev);
257 	else
258 		call_rcu(&idev->rcu_head, in_dev_free_rcu);
259 }
260 EXPORT_SYMBOL(in_dev_finish_destroy);
261 
262 static struct in_device *inetdev_init(struct net_device *dev)
263 {
264 	struct in_device *in_dev;
265 	int err = -ENOMEM;
266 
267 	ASSERT_RTNL();
268 
269 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
270 	if (!in_dev)
271 		goto out;
272 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
273 			sizeof(in_dev->cnf));
274 	in_dev->cnf.sysctl = NULL;
275 	in_dev->dev = dev;
276 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
277 	if (!in_dev->arp_parms)
278 		goto out_kfree;
279 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
280 		dev_disable_lro(dev);
281 	/* Reference in_dev->dev */
282 	netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
283 	/* Account for reference dev->ip_ptr (below) */
284 	refcount_set(&in_dev->refcnt, 1);
285 
286 	err = devinet_sysctl_register(in_dev);
287 	if (err) {
288 		in_dev->dead = 1;
289 		neigh_parms_release(&arp_tbl, in_dev->arp_parms);
290 		in_dev_put(in_dev);
291 		in_dev = NULL;
292 		goto out;
293 	}
294 	ip_mc_init_dev(in_dev);
295 	if (dev->flags & IFF_UP)
296 		ip_mc_up(in_dev);
297 
298 	/* we can receive as soon as ip_ptr is set -- do this last */
299 	rcu_assign_pointer(dev->ip_ptr, in_dev);
300 out:
301 	return in_dev ?: ERR_PTR(err);
302 out_kfree:
303 	kfree(in_dev);
304 	in_dev = NULL;
305 	goto out;
306 }
307 
308 static void inetdev_destroy(struct in_device *in_dev)
309 {
310 	struct net_device *dev;
311 	struct in_ifaddr *ifa;
312 
313 	ASSERT_RTNL();
314 
315 	dev = in_dev->dev;
316 
317 	in_dev->dead = 1;
318 
319 	ip_mc_destroy_dev(in_dev);
320 
321 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
322 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
323 		inet_free_ifa(ifa);
324 	}
325 
326 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
327 
328 	devinet_sysctl_unregister(in_dev);
329 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
330 	arp_ifdown(dev);
331 
332 	in_dev_put(in_dev);
333 }
334 
335 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
336 {
337 	const struct in_ifaddr *ifa;
338 
339 	rcu_read_lock();
340 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
341 		if (inet_ifa_match(a, ifa)) {
342 			if (!b || inet_ifa_match(b, ifa)) {
343 				rcu_read_unlock();
344 				return 1;
345 			}
346 		}
347 	}
348 	rcu_read_unlock();
349 	return 0;
350 }
351 
352 static void __inet_del_ifa(struct in_device *in_dev,
353 			   struct in_ifaddr __rcu **ifap,
354 			   int destroy, struct nlmsghdr *nlh, u32 portid)
355 {
356 	struct in_ifaddr *promote = NULL;
357 	struct in_ifaddr *ifa, *ifa1;
358 	struct in_ifaddr __rcu **last_prim;
359 	struct in_ifaddr *prev_prom = NULL;
360 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
361 
362 	ASSERT_RTNL();
363 
364 	ifa1 = rtnl_dereference(*ifap);
365 	last_prim = ifap;
366 	if (in_dev->dead)
367 		goto no_promotions;
368 
369 	/* 1. Deleting primary ifaddr forces deletion all secondaries
370 	 * unless alias promotion is set
371 	 **/
372 
373 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
374 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
375 
376 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
377 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
378 			    ifa1->ifa_scope <= ifa->ifa_scope)
379 				last_prim = &ifa->ifa_next;
380 
381 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
382 			    ifa1->ifa_mask != ifa->ifa_mask ||
383 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
384 				ifap1 = &ifa->ifa_next;
385 				prev_prom = ifa;
386 				continue;
387 			}
388 
389 			if (!do_promote) {
390 				inet_hash_remove(ifa);
391 				*ifap1 = ifa->ifa_next;
392 
393 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
394 				blocking_notifier_call_chain(&inetaddr_chain,
395 						NETDEV_DOWN, ifa);
396 				inet_free_ifa(ifa);
397 			} else {
398 				promote = ifa;
399 				break;
400 			}
401 		}
402 	}
403 
404 	/* On promotion all secondaries from subnet are changing
405 	 * the primary IP, we must remove all their routes silently
406 	 * and later to add them back with new prefsrc. Do this
407 	 * while all addresses are on the device list.
408 	 */
409 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
410 		if (ifa1->ifa_mask == ifa->ifa_mask &&
411 		    inet_ifa_match(ifa1->ifa_address, ifa))
412 			fib_del_ifaddr(ifa, ifa1);
413 	}
414 
415 no_promotions:
416 	/* 2. Unlink it */
417 
418 	*ifap = ifa1->ifa_next;
419 	inet_hash_remove(ifa1);
420 
421 	/* 3. Announce address deletion */
422 
423 	/* Send message first, then call notifier.
424 	   At first sight, FIB update triggered by notifier
425 	   will refer to already deleted ifaddr, that could confuse
426 	   netlink listeners. It is not true: look, gated sees
427 	   that route deleted and if it still thinks that ifaddr
428 	   is valid, it will try to restore deleted routes... Grr.
429 	   So that, this order is correct.
430 	 */
431 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
432 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
433 
434 	if (promote) {
435 		struct in_ifaddr *next_sec;
436 
437 		next_sec = rtnl_dereference(promote->ifa_next);
438 		if (prev_prom) {
439 			struct in_ifaddr *last_sec;
440 
441 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
442 
443 			last_sec = rtnl_dereference(*last_prim);
444 			rcu_assign_pointer(promote->ifa_next, last_sec);
445 			rcu_assign_pointer(*last_prim, promote);
446 		}
447 
448 		promote->ifa_flags &= ~IFA_F_SECONDARY;
449 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
450 		blocking_notifier_call_chain(&inetaddr_chain,
451 				NETDEV_UP, promote);
452 		for (ifa = next_sec; ifa;
453 		     ifa = rtnl_dereference(ifa->ifa_next)) {
454 			if (ifa1->ifa_mask != ifa->ifa_mask ||
455 			    !inet_ifa_match(ifa1->ifa_address, ifa))
456 					continue;
457 			fib_add_ifaddr(ifa);
458 		}
459 
460 	}
461 	if (destroy)
462 		inet_free_ifa(ifa1);
463 }
464 
465 static void inet_del_ifa(struct in_device *in_dev,
466 			 struct in_ifaddr __rcu **ifap,
467 			 int destroy)
468 {
469 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
470 }
471 
472 static void check_lifetime(struct work_struct *work);
473 
474 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
475 
476 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
477 			     u32 portid, struct netlink_ext_ack *extack)
478 {
479 	struct in_ifaddr __rcu **last_primary, **ifap;
480 	struct in_device *in_dev = ifa->ifa_dev;
481 	struct in_validator_info ivi;
482 	struct in_ifaddr *ifa1;
483 	int ret;
484 
485 	ASSERT_RTNL();
486 
487 	if (!ifa->ifa_local) {
488 		inet_free_ifa(ifa);
489 		return 0;
490 	}
491 
492 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
493 	last_primary = &in_dev->ifa_list;
494 
495 	/* Don't set IPv6 only flags to IPv4 addresses */
496 	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
497 
498 	ifap = &in_dev->ifa_list;
499 	ifa1 = rtnl_dereference(*ifap);
500 
501 	while (ifa1) {
502 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
503 		    ifa->ifa_scope <= ifa1->ifa_scope)
504 			last_primary = &ifa1->ifa_next;
505 		if (ifa1->ifa_mask == ifa->ifa_mask &&
506 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
507 			if (ifa1->ifa_local == ifa->ifa_local) {
508 				inet_free_ifa(ifa);
509 				return -EEXIST;
510 			}
511 			if (ifa1->ifa_scope != ifa->ifa_scope) {
512 				NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
513 				inet_free_ifa(ifa);
514 				return -EINVAL;
515 			}
516 			ifa->ifa_flags |= IFA_F_SECONDARY;
517 		}
518 
519 		ifap = &ifa1->ifa_next;
520 		ifa1 = rtnl_dereference(*ifap);
521 	}
522 
523 	/* Allow any devices that wish to register ifaddr validtors to weigh
524 	 * in now, before changes are committed.  The rntl lock is serializing
525 	 * access here, so the state should not change between a validator call
526 	 * and a final notify on commit.  This isn't invoked on promotion under
527 	 * the assumption that validators are checking the address itself, and
528 	 * not the flags.
529 	 */
530 	ivi.ivi_addr = ifa->ifa_address;
531 	ivi.ivi_dev = ifa->ifa_dev;
532 	ivi.extack = extack;
533 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
534 					   NETDEV_UP, &ivi);
535 	ret = notifier_to_errno(ret);
536 	if (ret) {
537 		inet_free_ifa(ifa);
538 		return ret;
539 	}
540 
541 	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
542 		ifap = last_primary;
543 
544 	rcu_assign_pointer(ifa->ifa_next, *ifap);
545 	rcu_assign_pointer(*ifap, ifa);
546 
547 	inet_hash_insert(dev_net(in_dev->dev), ifa);
548 
549 	cancel_delayed_work(&check_lifetime_work);
550 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
551 
552 	/* Send message first, then call notifier.
553 	   Notifier will trigger FIB update, so that
554 	   listeners of netlink will know about new ifaddr */
555 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
556 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
557 
558 	return 0;
559 }
560 
561 static int inet_insert_ifa(struct in_ifaddr *ifa)
562 {
563 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
564 }
565 
566 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
567 {
568 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
569 
570 	ASSERT_RTNL();
571 
572 	if (!in_dev) {
573 		inet_free_ifa(ifa);
574 		return -ENOBUFS;
575 	}
576 	ipv4_devconf_setall(in_dev);
577 	neigh_parms_data_state_setall(in_dev->arp_parms);
578 	if (ifa->ifa_dev != in_dev) {
579 		WARN_ON(ifa->ifa_dev);
580 		in_dev_hold(in_dev);
581 		ifa->ifa_dev = in_dev;
582 	}
583 	if (ipv4_is_loopback(ifa->ifa_local))
584 		ifa->ifa_scope = RT_SCOPE_HOST;
585 	return inet_insert_ifa(ifa);
586 }
587 
588 /* Caller must hold RCU or RTNL :
589  * We dont take a reference on found in_device
590  */
591 struct in_device *inetdev_by_index(struct net *net, int ifindex)
592 {
593 	struct net_device *dev;
594 	struct in_device *in_dev = NULL;
595 
596 	rcu_read_lock();
597 	dev = dev_get_by_index_rcu(net, ifindex);
598 	if (dev)
599 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
600 	rcu_read_unlock();
601 	return in_dev;
602 }
603 EXPORT_SYMBOL(inetdev_by_index);
604 
605 /* Called only from RTNL semaphored context. No locks. */
606 
607 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
608 				    __be32 mask)
609 {
610 	struct in_ifaddr *ifa;
611 
612 	ASSERT_RTNL();
613 
614 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
615 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
616 			return ifa;
617 	}
618 	return NULL;
619 }
620 
621 static int ip_mc_autojoin_config(struct net *net, bool join,
622 				 const struct in_ifaddr *ifa)
623 {
624 #if defined(CONFIG_IP_MULTICAST)
625 	struct ip_mreqn mreq = {
626 		.imr_multiaddr.s_addr = ifa->ifa_address,
627 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
628 	};
629 	struct sock *sk = net->ipv4.mc_autojoin_sk;
630 	int ret;
631 
632 	ASSERT_RTNL();
633 
634 	lock_sock(sk);
635 	if (join)
636 		ret = ip_mc_join_group(sk, &mreq);
637 	else
638 		ret = ip_mc_leave_group(sk, &mreq);
639 	release_sock(sk);
640 
641 	return ret;
642 #else
643 	return -EOPNOTSUPP;
644 #endif
645 }
646 
647 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
648 			    struct netlink_ext_ack *extack)
649 {
650 	struct net *net = sock_net(skb->sk);
651 	struct in_ifaddr __rcu **ifap;
652 	struct nlattr *tb[IFA_MAX+1];
653 	struct in_device *in_dev;
654 	struct ifaddrmsg *ifm;
655 	struct in_ifaddr *ifa;
656 	int err;
657 
658 	ASSERT_RTNL();
659 
660 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
661 				     ifa_ipv4_policy, extack);
662 	if (err < 0)
663 		goto errout;
664 
665 	ifm = nlmsg_data(nlh);
666 	in_dev = inetdev_by_index(net, ifm->ifa_index);
667 	if (!in_dev) {
668 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
669 		err = -ENODEV;
670 		goto errout;
671 	}
672 
673 	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
674 	     ifap = &ifa->ifa_next) {
675 		if (tb[IFA_LOCAL] &&
676 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
677 			continue;
678 
679 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
680 			continue;
681 
682 		if (tb[IFA_ADDRESS] &&
683 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
684 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
685 			continue;
686 
687 		if (ipv4_is_multicast(ifa->ifa_address))
688 			ip_mc_autojoin_config(net, false, ifa);
689 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
690 		return 0;
691 	}
692 
693 	NL_SET_ERR_MSG(extack, "ipv4: Address not found");
694 	err = -EADDRNOTAVAIL;
695 errout:
696 	return err;
697 }
698 
699 #define INFINITY_LIFE_TIME	0xFFFFFFFF
700 
701 static void check_lifetime(struct work_struct *work)
702 {
703 	unsigned long now, next, next_sec, next_sched;
704 	struct in_ifaddr *ifa;
705 	struct hlist_node *n;
706 	int i;
707 
708 	now = jiffies;
709 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
710 
711 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
712 		bool change_needed = false;
713 
714 		rcu_read_lock();
715 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
716 			unsigned long age;
717 
718 			if (ifa->ifa_flags & IFA_F_PERMANENT)
719 				continue;
720 
721 			/* We try to batch several events at once. */
722 			age = (now - ifa->ifa_tstamp +
723 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
724 
725 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
726 			    age >= ifa->ifa_valid_lft) {
727 				change_needed = true;
728 			} else if (ifa->ifa_preferred_lft ==
729 				   INFINITY_LIFE_TIME) {
730 				continue;
731 			} else if (age >= ifa->ifa_preferred_lft) {
732 				if (time_before(ifa->ifa_tstamp +
733 						ifa->ifa_valid_lft * HZ, next))
734 					next = ifa->ifa_tstamp +
735 					       ifa->ifa_valid_lft * HZ;
736 
737 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
738 					change_needed = true;
739 			} else if (time_before(ifa->ifa_tstamp +
740 					       ifa->ifa_preferred_lft * HZ,
741 					       next)) {
742 				next = ifa->ifa_tstamp +
743 				       ifa->ifa_preferred_lft * HZ;
744 			}
745 		}
746 		rcu_read_unlock();
747 		if (!change_needed)
748 			continue;
749 		rtnl_lock();
750 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
751 			unsigned long age;
752 
753 			if (ifa->ifa_flags & IFA_F_PERMANENT)
754 				continue;
755 
756 			/* We try to batch several events at once. */
757 			age = (now - ifa->ifa_tstamp +
758 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
759 
760 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
761 			    age >= ifa->ifa_valid_lft) {
762 				struct in_ifaddr __rcu **ifap;
763 				struct in_ifaddr *tmp;
764 
765 				ifap = &ifa->ifa_dev->ifa_list;
766 				tmp = rtnl_dereference(*ifap);
767 				while (tmp) {
768 					if (tmp == ifa) {
769 						inet_del_ifa(ifa->ifa_dev,
770 							     ifap, 1);
771 						break;
772 					}
773 					ifap = &tmp->ifa_next;
774 					tmp = rtnl_dereference(*ifap);
775 				}
776 			} else if (ifa->ifa_preferred_lft !=
777 				   INFINITY_LIFE_TIME &&
778 				   age >= ifa->ifa_preferred_lft &&
779 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
780 				ifa->ifa_flags |= IFA_F_DEPRECATED;
781 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
782 			}
783 		}
784 		rtnl_unlock();
785 	}
786 
787 	next_sec = round_jiffies_up(next);
788 	next_sched = next;
789 
790 	/* If rounded timeout is accurate enough, accept it. */
791 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
792 		next_sched = next_sec;
793 
794 	now = jiffies;
795 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
796 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
797 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
798 
799 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
800 			next_sched - now);
801 }
802 
803 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
804 			     __u32 prefered_lft)
805 {
806 	unsigned long timeout;
807 
808 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
809 
810 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
811 	if (addrconf_finite_timeout(timeout))
812 		ifa->ifa_valid_lft = timeout;
813 	else
814 		ifa->ifa_flags |= IFA_F_PERMANENT;
815 
816 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
817 	if (addrconf_finite_timeout(timeout)) {
818 		if (timeout == 0)
819 			ifa->ifa_flags |= IFA_F_DEPRECATED;
820 		ifa->ifa_preferred_lft = timeout;
821 	}
822 	ifa->ifa_tstamp = jiffies;
823 	if (!ifa->ifa_cstamp)
824 		ifa->ifa_cstamp = ifa->ifa_tstamp;
825 }
826 
827 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
828 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
829 				       struct netlink_ext_ack *extack)
830 {
831 	struct nlattr *tb[IFA_MAX+1];
832 	struct in_ifaddr *ifa;
833 	struct ifaddrmsg *ifm;
834 	struct net_device *dev;
835 	struct in_device *in_dev;
836 	int err;
837 
838 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
839 				     ifa_ipv4_policy, extack);
840 	if (err < 0)
841 		goto errout;
842 
843 	ifm = nlmsg_data(nlh);
844 	err = -EINVAL;
845 
846 	if (ifm->ifa_prefixlen > 32) {
847 		NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
848 		goto errout;
849 	}
850 
851 	if (!tb[IFA_LOCAL]) {
852 		NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
853 		goto errout;
854 	}
855 
856 	dev = __dev_get_by_index(net, ifm->ifa_index);
857 	err = -ENODEV;
858 	if (!dev) {
859 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
860 		goto errout;
861 	}
862 
863 	in_dev = __in_dev_get_rtnl(dev);
864 	err = -ENOBUFS;
865 	if (!in_dev)
866 		goto errout;
867 
868 	ifa = inet_alloc_ifa();
869 	if (!ifa)
870 		/*
871 		 * A potential indev allocation can be left alive, it stays
872 		 * assigned to its device and is destroy with it.
873 		 */
874 		goto errout;
875 
876 	ipv4_devconf_setall(in_dev);
877 	neigh_parms_data_state_setall(in_dev->arp_parms);
878 	in_dev_hold(in_dev);
879 
880 	if (!tb[IFA_ADDRESS])
881 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
882 
883 	INIT_HLIST_NODE(&ifa->hash);
884 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
885 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
886 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
887 					 ifm->ifa_flags;
888 	ifa->ifa_scope = ifm->ifa_scope;
889 	ifa->ifa_dev = in_dev;
890 
891 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
892 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
893 
894 	if (tb[IFA_BROADCAST])
895 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
896 
897 	if (tb[IFA_LABEL])
898 		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
899 	else
900 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
901 
902 	if (tb[IFA_RT_PRIORITY])
903 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
904 
905 	if (tb[IFA_PROTO])
906 		ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
907 
908 	if (tb[IFA_CACHEINFO]) {
909 		struct ifa_cacheinfo *ci;
910 
911 		ci = nla_data(tb[IFA_CACHEINFO]);
912 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
913 			NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
914 			err = -EINVAL;
915 			goto errout_free;
916 		}
917 		*pvalid_lft = ci->ifa_valid;
918 		*pprefered_lft = ci->ifa_prefered;
919 	}
920 
921 	return ifa;
922 
923 errout_free:
924 	inet_free_ifa(ifa);
925 errout:
926 	return ERR_PTR(err);
927 }
928 
929 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
930 {
931 	struct in_device *in_dev = ifa->ifa_dev;
932 	struct in_ifaddr *ifa1;
933 
934 	if (!ifa->ifa_local)
935 		return NULL;
936 
937 	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
938 		if (ifa1->ifa_mask == ifa->ifa_mask &&
939 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
940 		    ifa1->ifa_local == ifa->ifa_local)
941 			return ifa1;
942 	}
943 	return NULL;
944 }
945 
946 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
947 			    struct netlink_ext_ack *extack)
948 {
949 	struct net *net = sock_net(skb->sk);
950 	struct in_ifaddr *ifa;
951 	struct in_ifaddr *ifa_existing;
952 	__u32 valid_lft = INFINITY_LIFE_TIME;
953 	__u32 prefered_lft = INFINITY_LIFE_TIME;
954 
955 	ASSERT_RTNL();
956 
957 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
958 	if (IS_ERR(ifa))
959 		return PTR_ERR(ifa);
960 
961 	ifa_existing = find_matching_ifa(ifa);
962 	if (!ifa_existing) {
963 		/* It would be best to check for !NLM_F_CREATE here but
964 		 * userspace already relies on not having to provide this.
965 		 */
966 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
967 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
968 			int ret = ip_mc_autojoin_config(net, true, ifa);
969 
970 			if (ret < 0) {
971 				NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
972 				inet_free_ifa(ifa);
973 				return ret;
974 			}
975 		}
976 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
977 					 extack);
978 	} else {
979 		u32 new_metric = ifa->ifa_rt_priority;
980 		u8 new_proto = ifa->ifa_proto;
981 
982 		inet_free_ifa(ifa);
983 
984 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
985 		    !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
986 			NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
987 			return -EEXIST;
988 		}
989 		ifa = ifa_existing;
990 
991 		if (ifa->ifa_rt_priority != new_metric) {
992 			fib_modify_prefix_metric(ifa, new_metric);
993 			ifa->ifa_rt_priority = new_metric;
994 		}
995 
996 		ifa->ifa_proto = new_proto;
997 
998 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
999 		cancel_delayed_work(&check_lifetime_work);
1000 		queue_delayed_work(system_power_efficient_wq,
1001 				&check_lifetime_work, 0);
1002 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1003 	}
1004 	return 0;
1005 }
1006 
1007 /*
1008  *	Determine a default network mask, based on the IP address.
1009  */
1010 
1011 static int inet_abc_len(__be32 addr)
1012 {
1013 	int rc = -1;	/* Something else, probably a multicast. */
1014 
1015 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1016 		rc = 0;
1017 	else {
1018 		__u32 haddr = ntohl(addr);
1019 		if (IN_CLASSA(haddr))
1020 			rc = 8;
1021 		else if (IN_CLASSB(haddr))
1022 			rc = 16;
1023 		else if (IN_CLASSC(haddr))
1024 			rc = 24;
1025 		else if (IN_CLASSE(haddr))
1026 			rc = 32;
1027 	}
1028 
1029 	return rc;
1030 }
1031 
1032 
1033 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1034 {
1035 	struct sockaddr_in sin_orig;
1036 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1037 	struct in_ifaddr __rcu **ifap = NULL;
1038 	struct in_device *in_dev;
1039 	struct in_ifaddr *ifa = NULL;
1040 	struct net_device *dev;
1041 	char *colon;
1042 	int ret = -EFAULT;
1043 	int tryaddrmatch = 0;
1044 
1045 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1046 
1047 	/* save original address for comparison */
1048 	memcpy(&sin_orig, sin, sizeof(*sin));
1049 
1050 	colon = strchr(ifr->ifr_name, ':');
1051 	if (colon)
1052 		*colon = 0;
1053 
1054 	dev_load(net, ifr->ifr_name);
1055 
1056 	switch (cmd) {
1057 	case SIOCGIFADDR:	/* Get interface address */
1058 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1059 	case SIOCGIFDSTADDR:	/* Get the destination address */
1060 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1061 		/* Note that these ioctls will not sleep,
1062 		   so that we do not impose a lock.
1063 		   One day we will be forced to put shlock here (I mean SMP)
1064 		 */
1065 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1066 		memset(sin, 0, sizeof(*sin));
1067 		sin->sin_family = AF_INET;
1068 		break;
1069 
1070 	case SIOCSIFFLAGS:
1071 		ret = -EPERM;
1072 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1073 			goto out;
1074 		break;
1075 	case SIOCSIFADDR:	/* Set interface address (and family) */
1076 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1077 	case SIOCSIFDSTADDR:	/* Set the destination address */
1078 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1079 		ret = -EPERM;
1080 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1081 			goto out;
1082 		ret = -EINVAL;
1083 		if (sin->sin_family != AF_INET)
1084 			goto out;
1085 		break;
1086 	default:
1087 		ret = -EINVAL;
1088 		goto out;
1089 	}
1090 
1091 	rtnl_lock();
1092 
1093 	ret = -ENODEV;
1094 	dev = __dev_get_by_name(net, ifr->ifr_name);
1095 	if (!dev)
1096 		goto done;
1097 
1098 	if (colon)
1099 		*colon = ':';
1100 
1101 	in_dev = __in_dev_get_rtnl(dev);
1102 	if (in_dev) {
1103 		if (tryaddrmatch) {
1104 			/* Matthias Andree */
1105 			/* compare label and address (4.4BSD style) */
1106 			/* note: we only do this for a limited set of ioctls
1107 			   and only if the original address family was AF_INET.
1108 			   This is checked above. */
1109 
1110 			for (ifap = &in_dev->ifa_list;
1111 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1112 			     ifap = &ifa->ifa_next) {
1113 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1114 				    sin_orig.sin_addr.s_addr ==
1115 							ifa->ifa_local) {
1116 					break; /* found */
1117 				}
1118 			}
1119 		}
1120 		/* we didn't get a match, maybe the application is
1121 		   4.3BSD-style and passed in junk so we fall back to
1122 		   comparing just the label */
1123 		if (!ifa) {
1124 			for (ifap = &in_dev->ifa_list;
1125 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1126 			     ifap = &ifa->ifa_next)
1127 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1128 					break;
1129 		}
1130 	}
1131 
1132 	ret = -EADDRNOTAVAIL;
1133 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1134 		goto done;
1135 
1136 	switch (cmd) {
1137 	case SIOCGIFADDR:	/* Get interface address */
1138 		ret = 0;
1139 		sin->sin_addr.s_addr = ifa->ifa_local;
1140 		break;
1141 
1142 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1143 		ret = 0;
1144 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1145 		break;
1146 
1147 	case SIOCGIFDSTADDR:	/* Get the destination address */
1148 		ret = 0;
1149 		sin->sin_addr.s_addr = ifa->ifa_address;
1150 		break;
1151 
1152 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1153 		ret = 0;
1154 		sin->sin_addr.s_addr = ifa->ifa_mask;
1155 		break;
1156 
1157 	case SIOCSIFFLAGS:
1158 		if (colon) {
1159 			ret = -EADDRNOTAVAIL;
1160 			if (!ifa)
1161 				break;
1162 			ret = 0;
1163 			if (!(ifr->ifr_flags & IFF_UP))
1164 				inet_del_ifa(in_dev, ifap, 1);
1165 			break;
1166 		}
1167 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1168 		break;
1169 
1170 	case SIOCSIFADDR:	/* Set interface address (and family) */
1171 		ret = -EINVAL;
1172 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1173 			break;
1174 
1175 		if (!ifa) {
1176 			ret = -ENOBUFS;
1177 			ifa = inet_alloc_ifa();
1178 			if (!ifa)
1179 				break;
1180 			INIT_HLIST_NODE(&ifa->hash);
1181 			if (colon)
1182 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1183 			else
1184 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1185 		} else {
1186 			ret = 0;
1187 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1188 				break;
1189 			inet_del_ifa(in_dev, ifap, 0);
1190 			ifa->ifa_broadcast = 0;
1191 			ifa->ifa_scope = 0;
1192 		}
1193 
1194 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1195 
1196 		if (!(dev->flags & IFF_POINTOPOINT)) {
1197 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1198 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1199 			if ((dev->flags & IFF_BROADCAST) &&
1200 			    ifa->ifa_prefixlen < 31)
1201 				ifa->ifa_broadcast = ifa->ifa_address |
1202 						     ~ifa->ifa_mask;
1203 		} else {
1204 			ifa->ifa_prefixlen = 32;
1205 			ifa->ifa_mask = inet_make_mask(32);
1206 		}
1207 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1208 		ret = inet_set_ifa(dev, ifa);
1209 		break;
1210 
1211 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1212 		ret = 0;
1213 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1214 			inet_del_ifa(in_dev, ifap, 0);
1215 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1216 			inet_insert_ifa(ifa);
1217 		}
1218 		break;
1219 
1220 	case SIOCSIFDSTADDR:	/* Set the destination address */
1221 		ret = 0;
1222 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1223 			break;
1224 		ret = -EINVAL;
1225 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1226 			break;
1227 		ret = 0;
1228 		inet_del_ifa(in_dev, ifap, 0);
1229 		ifa->ifa_address = sin->sin_addr.s_addr;
1230 		inet_insert_ifa(ifa);
1231 		break;
1232 
1233 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1234 
1235 		/*
1236 		 *	The mask we set must be legal.
1237 		 */
1238 		ret = -EINVAL;
1239 		if (bad_mask(sin->sin_addr.s_addr, 0))
1240 			break;
1241 		ret = 0;
1242 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1243 			__be32 old_mask = ifa->ifa_mask;
1244 			inet_del_ifa(in_dev, ifap, 0);
1245 			ifa->ifa_mask = sin->sin_addr.s_addr;
1246 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1247 
1248 			/* See if current broadcast address matches
1249 			 * with current netmask, then recalculate
1250 			 * the broadcast address. Otherwise it's a
1251 			 * funny address, so don't touch it since
1252 			 * the user seems to know what (s)he's doing...
1253 			 */
1254 			if ((dev->flags & IFF_BROADCAST) &&
1255 			    (ifa->ifa_prefixlen < 31) &&
1256 			    (ifa->ifa_broadcast ==
1257 			     (ifa->ifa_local|~old_mask))) {
1258 				ifa->ifa_broadcast = (ifa->ifa_local |
1259 						      ~sin->sin_addr.s_addr);
1260 			}
1261 			inet_insert_ifa(ifa);
1262 		}
1263 		break;
1264 	}
1265 done:
1266 	rtnl_unlock();
1267 out:
1268 	return ret;
1269 }
1270 
1271 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1272 {
1273 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1274 	const struct in_ifaddr *ifa;
1275 	struct ifreq ifr;
1276 	int done = 0;
1277 
1278 	if (WARN_ON(size > sizeof(struct ifreq)))
1279 		goto out;
1280 
1281 	if (!in_dev)
1282 		goto out;
1283 
1284 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1285 		if (!buf) {
1286 			done += size;
1287 			continue;
1288 		}
1289 		if (len < size)
1290 			break;
1291 		memset(&ifr, 0, sizeof(struct ifreq));
1292 		strcpy(ifr.ifr_name, ifa->ifa_label);
1293 
1294 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1295 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1296 								ifa->ifa_local;
1297 
1298 		if (copy_to_user(buf + done, &ifr, size)) {
1299 			done = -EFAULT;
1300 			break;
1301 		}
1302 		len  -= size;
1303 		done += size;
1304 	}
1305 out:
1306 	return done;
1307 }
1308 
1309 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1310 				 int scope)
1311 {
1312 	const struct in_ifaddr *ifa;
1313 
1314 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1315 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1316 			continue;
1317 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1318 		    ifa->ifa_scope <= scope)
1319 			return ifa->ifa_local;
1320 	}
1321 
1322 	return 0;
1323 }
1324 
1325 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1326 {
1327 	const struct in_ifaddr *ifa;
1328 	__be32 addr = 0;
1329 	unsigned char localnet_scope = RT_SCOPE_HOST;
1330 	struct in_device *in_dev;
1331 	struct net *net = dev_net(dev);
1332 	int master_idx;
1333 
1334 	rcu_read_lock();
1335 	in_dev = __in_dev_get_rcu(dev);
1336 	if (!in_dev)
1337 		goto no_in_dev;
1338 
1339 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1340 		localnet_scope = RT_SCOPE_LINK;
1341 
1342 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1343 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1344 			continue;
1345 		if (min(ifa->ifa_scope, localnet_scope) > scope)
1346 			continue;
1347 		if (!dst || inet_ifa_match(dst, ifa)) {
1348 			addr = ifa->ifa_local;
1349 			break;
1350 		}
1351 		if (!addr)
1352 			addr = ifa->ifa_local;
1353 	}
1354 
1355 	if (addr)
1356 		goto out_unlock;
1357 no_in_dev:
1358 	master_idx = l3mdev_master_ifindex_rcu(dev);
1359 
1360 	/* For VRFs, the VRF device takes the place of the loopback device,
1361 	 * with addresses on it being preferred.  Note in such cases the
1362 	 * loopback device will be among the devices that fail the master_idx
1363 	 * equality check in the loop below.
1364 	 */
1365 	if (master_idx &&
1366 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1367 	    (in_dev = __in_dev_get_rcu(dev))) {
1368 		addr = in_dev_select_addr(in_dev, scope);
1369 		if (addr)
1370 			goto out_unlock;
1371 	}
1372 
1373 	/* Not loopback addresses on loopback should be preferred
1374 	   in this case. It is important that lo is the first interface
1375 	   in dev_base list.
1376 	 */
1377 	for_each_netdev_rcu(net, dev) {
1378 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1379 			continue;
1380 
1381 		in_dev = __in_dev_get_rcu(dev);
1382 		if (!in_dev)
1383 			continue;
1384 
1385 		addr = in_dev_select_addr(in_dev, scope);
1386 		if (addr)
1387 			goto out_unlock;
1388 	}
1389 out_unlock:
1390 	rcu_read_unlock();
1391 	return addr;
1392 }
1393 EXPORT_SYMBOL(inet_select_addr);
1394 
1395 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1396 			      __be32 local, int scope)
1397 {
1398 	unsigned char localnet_scope = RT_SCOPE_HOST;
1399 	const struct in_ifaddr *ifa;
1400 	__be32 addr = 0;
1401 	int same = 0;
1402 
1403 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1404 		localnet_scope = RT_SCOPE_LINK;
1405 
1406 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1407 		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1408 
1409 		if (!addr &&
1410 		    (local == ifa->ifa_local || !local) &&
1411 		    min_scope <= scope) {
1412 			addr = ifa->ifa_local;
1413 			if (same)
1414 				break;
1415 		}
1416 		if (!same) {
1417 			same = (!local || inet_ifa_match(local, ifa)) &&
1418 				(!dst || inet_ifa_match(dst, ifa));
1419 			if (same && addr) {
1420 				if (local || !dst)
1421 					break;
1422 				/* Is the selected addr into dst subnet? */
1423 				if (inet_ifa_match(addr, ifa))
1424 					break;
1425 				/* No, then can we use new local src? */
1426 				if (min_scope <= scope) {
1427 					addr = ifa->ifa_local;
1428 					break;
1429 				}
1430 				/* search for large dst subnet for addr */
1431 				same = 0;
1432 			}
1433 		}
1434 	}
1435 
1436 	return same ? addr : 0;
1437 }
1438 
1439 /*
1440  * Confirm that local IP address exists using wildcards:
1441  * - net: netns to check, cannot be NULL
1442  * - in_dev: only on this interface, NULL=any interface
1443  * - dst: only in the same subnet as dst, 0=any dst
1444  * - local: address, 0=autoselect the local address
1445  * - scope: maximum allowed scope value for the local address
1446  */
1447 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1448 			 __be32 dst, __be32 local, int scope)
1449 {
1450 	__be32 addr = 0;
1451 	struct net_device *dev;
1452 
1453 	if (in_dev)
1454 		return confirm_addr_indev(in_dev, dst, local, scope);
1455 
1456 	rcu_read_lock();
1457 	for_each_netdev_rcu(net, dev) {
1458 		in_dev = __in_dev_get_rcu(dev);
1459 		if (in_dev) {
1460 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1461 			if (addr)
1462 				break;
1463 		}
1464 	}
1465 	rcu_read_unlock();
1466 
1467 	return addr;
1468 }
1469 EXPORT_SYMBOL(inet_confirm_addr);
1470 
1471 /*
1472  *	Device notifier
1473  */
1474 
1475 int register_inetaddr_notifier(struct notifier_block *nb)
1476 {
1477 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1478 }
1479 EXPORT_SYMBOL(register_inetaddr_notifier);
1480 
1481 int unregister_inetaddr_notifier(struct notifier_block *nb)
1482 {
1483 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1484 }
1485 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1486 
1487 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1488 {
1489 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1490 }
1491 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1492 
1493 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1494 {
1495 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1496 	    nb);
1497 }
1498 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1499 
1500 /* Rename ifa_labels for a device name change. Make some effort to preserve
1501  * existing alias numbering and to create unique labels if possible.
1502 */
1503 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1504 {
1505 	struct in_ifaddr *ifa;
1506 	int named = 0;
1507 
1508 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1509 		char old[IFNAMSIZ], *dot;
1510 
1511 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1512 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1513 		if (named++ == 0)
1514 			goto skip;
1515 		dot = strchr(old, ':');
1516 		if (!dot) {
1517 			sprintf(old, ":%d", named);
1518 			dot = old;
1519 		}
1520 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1521 			strcat(ifa->ifa_label, dot);
1522 		else
1523 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1524 skip:
1525 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1526 	}
1527 }
1528 
1529 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1530 					struct in_device *in_dev)
1531 
1532 {
1533 	const struct in_ifaddr *ifa;
1534 
1535 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1536 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1537 			 ifa->ifa_local, dev,
1538 			 ifa->ifa_local, NULL,
1539 			 dev->dev_addr, NULL);
1540 	}
1541 }
1542 
1543 /* Called only under RTNL semaphore */
1544 
1545 static int inetdev_event(struct notifier_block *this, unsigned long event,
1546 			 void *ptr)
1547 {
1548 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1549 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1550 
1551 	ASSERT_RTNL();
1552 
1553 	if (!in_dev) {
1554 		if (event == NETDEV_REGISTER) {
1555 			in_dev = inetdev_init(dev);
1556 			if (IS_ERR(in_dev))
1557 				return notifier_from_errno(PTR_ERR(in_dev));
1558 			if (dev->flags & IFF_LOOPBACK) {
1559 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1560 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1561 			}
1562 		} else if (event == NETDEV_CHANGEMTU) {
1563 			/* Re-enabling IP */
1564 			if (inetdev_valid_mtu(dev->mtu))
1565 				in_dev = inetdev_init(dev);
1566 		}
1567 		goto out;
1568 	}
1569 
1570 	switch (event) {
1571 	case NETDEV_REGISTER:
1572 		pr_debug("%s: bug\n", __func__);
1573 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1574 		break;
1575 	case NETDEV_UP:
1576 		if (!inetdev_valid_mtu(dev->mtu))
1577 			break;
1578 		if (dev->flags & IFF_LOOPBACK) {
1579 			struct in_ifaddr *ifa = inet_alloc_ifa();
1580 
1581 			if (ifa) {
1582 				INIT_HLIST_NODE(&ifa->hash);
1583 				ifa->ifa_local =
1584 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1585 				ifa->ifa_prefixlen = 8;
1586 				ifa->ifa_mask = inet_make_mask(8);
1587 				in_dev_hold(in_dev);
1588 				ifa->ifa_dev = in_dev;
1589 				ifa->ifa_scope = RT_SCOPE_HOST;
1590 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1591 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1592 						 INFINITY_LIFE_TIME);
1593 				ipv4_devconf_setall(in_dev);
1594 				neigh_parms_data_state_setall(in_dev->arp_parms);
1595 				inet_insert_ifa(ifa);
1596 			}
1597 		}
1598 		ip_mc_up(in_dev);
1599 		fallthrough;
1600 	case NETDEV_CHANGEADDR:
1601 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1602 			break;
1603 		fallthrough;
1604 	case NETDEV_NOTIFY_PEERS:
1605 		/* Send gratuitous ARP to notify of link change */
1606 		inetdev_send_gratuitous_arp(dev, in_dev);
1607 		break;
1608 	case NETDEV_DOWN:
1609 		ip_mc_down(in_dev);
1610 		break;
1611 	case NETDEV_PRE_TYPE_CHANGE:
1612 		ip_mc_unmap(in_dev);
1613 		break;
1614 	case NETDEV_POST_TYPE_CHANGE:
1615 		ip_mc_remap(in_dev);
1616 		break;
1617 	case NETDEV_CHANGEMTU:
1618 		if (inetdev_valid_mtu(dev->mtu))
1619 			break;
1620 		/* disable IP when MTU is not enough */
1621 		fallthrough;
1622 	case NETDEV_UNREGISTER:
1623 		inetdev_destroy(in_dev);
1624 		break;
1625 	case NETDEV_CHANGENAME:
1626 		/* Do not notify about label change, this event is
1627 		 * not interesting to applications using netlink.
1628 		 */
1629 		inetdev_changename(dev, in_dev);
1630 
1631 		devinet_sysctl_unregister(in_dev);
1632 		devinet_sysctl_register(in_dev);
1633 		break;
1634 	}
1635 out:
1636 	return NOTIFY_DONE;
1637 }
1638 
1639 static struct notifier_block ip_netdev_notifier = {
1640 	.notifier_call = inetdev_event,
1641 };
1642 
1643 static size_t inet_nlmsg_size(void)
1644 {
1645 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1646 	       + nla_total_size(4) /* IFA_ADDRESS */
1647 	       + nla_total_size(4) /* IFA_LOCAL */
1648 	       + nla_total_size(4) /* IFA_BROADCAST */
1649 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1650 	       + nla_total_size(4)  /* IFA_FLAGS */
1651 	       + nla_total_size(1)  /* IFA_PROTO */
1652 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1653 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1654 }
1655 
1656 static inline u32 cstamp_delta(unsigned long cstamp)
1657 {
1658 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1659 }
1660 
1661 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1662 			 unsigned long tstamp, u32 preferred, u32 valid)
1663 {
1664 	struct ifa_cacheinfo ci;
1665 
1666 	ci.cstamp = cstamp_delta(cstamp);
1667 	ci.tstamp = cstamp_delta(tstamp);
1668 	ci.ifa_prefered = preferred;
1669 	ci.ifa_valid = valid;
1670 
1671 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1672 }
1673 
1674 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1675 			    struct inet_fill_args *args)
1676 {
1677 	struct ifaddrmsg *ifm;
1678 	struct nlmsghdr  *nlh;
1679 	u32 preferred, valid;
1680 
1681 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1682 			args->flags);
1683 	if (!nlh)
1684 		return -EMSGSIZE;
1685 
1686 	ifm = nlmsg_data(nlh);
1687 	ifm->ifa_family = AF_INET;
1688 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1689 	ifm->ifa_flags = ifa->ifa_flags;
1690 	ifm->ifa_scope = ifa->ifa_scope;
1691 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1692 
1693 	if (args->netnsid >= 0 &&
1694 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1695 		goto nla_put_failure;
1696 
1697 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1698 		preferred = ifa->ifa_preferred_lft;
1699 		valid = ifa->ifa_valid_lft;
1700 		if (preferred != INFINITY_LIFE_TIME) {
1701 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1702 
1703 			if (preferred > tval)
1704 				preferred -= tval;
1705 			else
1706 				preferred = 0;
1707 			if (valid != INFINITY_LIFE_TIME) {
1708 				if (valid > tval)
1709 					valid -= tval;
1710 				else
1711 					valid = 0;
1712 			}
1713 		}
1714 	} else {
1715 		preferred = INFINITY_LIFE_TIME;
1716 		valid = INFINITY_LIFE_TIME;
1717 	}
1718 	if ((ifa->ifa_address &&
1719 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1720 	    (ifa->ifa_local &&
1721 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1722 	    (ifa->ifa_broadcast &&
1723 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1724 	    (ifa->ifa_label[0] &&
1725 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1726 	    (ifa->ifa_proto &&
1727 	     nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1728 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1729 	    (ifa->ifa_rt_priority &&
1730 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1731 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1732 			  preferred, valid))
1733 		goto nla_put_failure;
1734 
1735 	nlmsg_end(skb, nlh);
1736 	return 0;
1737 
1738 nla_put_failure:
1739 	nlmsg_cancel(skb, nlh);
1740 	return -EMSGSIZE;
1741 }
1742 
1743 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1744 				      struct inet_fill_args *fillargs,
1745 				      struct net **tgt_net, struct sock *sk,
1746 				      struct netlink_callback *cb)
1747 {
1748 	struct netlink_ext_ack *extack = cb->extack;
1749 	struct nlattr *tb[IFA_MAX+1];
1750 	struct ifaddrmsg *ifm;
1751 	int err, i;
1752 
1753 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1754 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1755 		return -EINVAL;
1756 	}
1757 
1758 	ifm = nlmsg_data(nlh);
1759 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1760 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1761 		return -EINVAL;
1762 	}
1763 
1764 	fillargs->ifindex = ifm->ifa_index;
1765 	if (fillargs->ifindex) {
1766 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1767 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1768 	}
1769 
1770 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1771 					    ifa_ipv4_policy, extack);
1772 	if (err < 0)
1773 		return err;
1774 
1775 	for (i = 0; i <= IFA_MAX; ++i) {
1776 		if (!tb[i])
1777 			continue;
1778 
1779 		if (i == IFA_TARGET_NETNSID) {
1780 			struct net *net;
1781 
1782 			fillargs->netnsid = nla_get_s32(tb[i]);
1783 
1784 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1785 			if (IS_ERR(net)) {
1786 				fillargs->netnsid = -1;
1787 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1788 				return PTR_ERR(net);
1789 			}
1790 			*tgt_net = net;
1791 		} else {
1792 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1793 			return -EINVAL;
1794 		}
1795 	}
1796 
1797 	return 0;
1798 }
1799 
1800 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1801 			    struct netlink_callback *cb, int s_ip_idx,
1802 			    struct inet_fill_args *fillargs)
1803 {
1804 	struct in_ifaddr *ifa;
1805 	int ip_idx = 0;
1806 	int err;
1807 
1808 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1809 		if (ip_idx < s_ip_idx) {
1810 			ip_idx++;
1811 			continue;
1812 		}
1813 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1814 		if (err < 0)
1815 			goto done;
1816 
1817 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1818 		ip_idx++;
1819 	}
1820 	err = 0;
1821 
1822 done:
1823 	cb->args[2] = ip_idx;
1824 
1825 	return err;
1826 }
1827 
1828 /* Combine dev_addr_genid and dev_base_seq to detect changes.
1829  */
1830 static u32 inet_base_seq(const struct net *net)
1831 {
1832 	u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1833 		  net->dev_base_seq;
1834 
1835 	/* Must not return 0 (see nl_dump_check_consistent()).
1836 	 * Chose a value far away from 0.
1837 	 */
1838 	if (!res)
1839 		res = 0x80000000;
1840 	return res;
1841 }
1842 
1843 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1844 {
1845 	const struct nlmsghdr *nlh = cb->nlh;
1846 	struct inet_fill_args fillargs = {
1847 		.portid = NETLINK_CB(cb->skb).portid,
1848 		.seq = nlh->nlmsg_seq,
1849 		.event = RTM_NEWADDR,
1850 		.flags = NLM_F_MULTI,
1851 		.netnsid = -1,
1852 	};
1853 	struct net *net = sock_net(skb->sk);
1854 	struct net *tgt_net = net;
1855 	int h, s_h;
1856 	int idx, s_idx;
1857 	int s_ip_idx;
1858 	struct net_device *dev;
1859 	struct in_device *in_dev;
1860 	struct hlist_head *head;
1861 	int err = 0;
1862 
1863 	s_h = cb->args[0];
1864 	s_idx = idx = cb->args[1];
1865 	s_ip_idx = cb->args[2];
1866 
1867 	if (cb->strict_check) {
1868 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1869 						 skb->sk, cb);
1870 		if (err < 0)
1871 			goto put_tgt_net;
1872 
1873 		err = 0;
1874 		if (fillargs.ifindex) {
1875 			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1876 			if (!dev) {
1877 				err = -ENODEV;
1878 				goto put_tgt_net;
1879 			}
1880 
1881 			in_dev = __in_dev_get_rtnl(dev);
1882 			if (in_dev) {
1883 				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1884 						       &fillargs);
1885 			}
1886 			goto put_tgt_net;
1887 		}
1888 	}
1889 
1890 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1891 		idx = 0;
1892 		head = &tgt_net->dev_index_head[h];
1893 		rcu_read_lock();
1894 		cb->seq = inet_base_seq(tgt_net);
1895 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1896 			if (idx < s_idx)
1897 				goto cont;
1898 			if (h > s_h || idx > s_idx)
1899 				s_ip_idx = 0;
1900 			in_dev = __in_dev_get_rcu(dev);
1901 			if (!in_dev)
1902 				goto cont;
1903 
1904 			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1905 					       &fillargs);
1906 			if (err < 0) {
1907 				rcu_read_unlock();
1908 				goto done;
1909 			}
1910 cont:
1911 			idx++;
1912 		}
1913 		rcu_read_unlock();
1914 	}
1915 
1916 done:
1917 	cb->args[0] = h;
1918 	cb->args[1] = idx;
1919 put_tgt_net:
1920 	if (fillargs.netnsid >= 0)
1921 		put_net(tgt_net);
1922 
1923 	return skb->len ? : err;
1924 }
1925 
1926 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1927 		      u32 portid)
1928 {
1929 	struct inet_fill_args fillargs = {
1930 		.portid = portid,
1931 		.seq = nlh ? nlh->nlmsg_seq : 0,
1932 		.event = event,
1933 		.flags = 0,
1934 		.netnsid = -1,
1935 	};
1936 	struct sk_buff *skb;
1937 	int err = -ENOBUFS;
1938 	struct net *net;
1939 
1940 	net = dev_net(ifa->ifa_dev->dev);
1941 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1942 	if (!skb)
1943 		goto errout;
1944 
1945 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1946 	if (err < 0) {
1947 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1948 		WARN_ON(err == -EMSGSIZE);
1949 		kfree_skb(skb);
1950 		goto errout;
1951 	}
1952 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1953 	return;
1954 errout:
1955 	if (err < 0)
1956 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1957 }
1958 
1959 static size_t inet_get_link_af_size(const struct net_device *dev,
1960 				    u32 ext_filter_mask)
1961 {
1962 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1963 
1964 	if (!in_dev)
1965 		return 0;
1966 
1967 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1968 }
1969 
1970 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1971 			     u32 ext_filter_mask)
1972 {
1973 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1974 	struct nlattr *nla;
1975 	int i;
1976 
1977 	if (!in_dev)
1978 		return -ENODATA;
1979 
1980 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1981 	if (!nla)
1982 		return -EMSGSIZE;
1983 
1984 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1985 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1986 
1987 	return 0;
1988 }
1989 
1990 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1991 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1992 };
1993 
1994 static int inet_validate_link_af(const struct net_device *dev,
1995 				 const struct nlattr *nla,
1996 				 struct netlink_ext_ack *extack)
1997 {
1998 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1999 	int err, rem;
2000 
2001 	if (dev && !__in_dev_get_rtnl(dev))
2002 		return -EAFNOSUPPORT;
2003 
2004 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
2005 					  inet_af_policy, extack);
2006 	if (err < 0)
2007 		return err;
2008 
2009 	if (tb[IFLA_INET_CONF]) {
2010 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
2011 			int cfgid = nla_type(a);
2012 
2013 			if (nla_len(a) < 4)
2014 				return -EINVAL;
2015 
2016 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2017 				return -EINVAL;
2018 		}
2019 	}
2020 
2021 	return 0;
2022 }
2023 
2024 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2025 			    struct netlink_ext_ack *extack)
2026 {
2027 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
2028 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
2029 	int rem;
2030 
2031 	if (!in_dev)
2032 		return -EAFNOSUPPORT;
2033 
2034 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2035 		return -EINVAL;
2036 
2037 	if (tb[IFLA_INET_CONF]) {
2038 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2039 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2040 	}
2041 
2042 	return 0;
2043 }
2044 
2045 static int inet_netconf_msgsize_devconf(int type)
2046 {
2047 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2048 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2049 	bool all = false;
2050 
2051 	if (type == NETCONFA_ALL)
2052 		all = true;
2053 
2054 	if (all || type == NETCONFA_FORWARDING)
2055 		size += nla_total_size(4);
2056 	if (all || type == NETCONFA_RP_FILTER)
2057 		size += nla_total_size(4);
2058 	if (all || type == NETCONFA_MC_FORWARDING)
2059 		size += nla_total_size(4);
2060 	if (all || type == NETCONFA_BC_FORWARDING)
2061 		size += nla_total_size(4);
2062 	if (all || type == NETCONFA_PROXY_NEIGH)
2063 		size += nla_total_size(4);
2064 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2065 		size += nla_total_size(4);
2066 
2067 	return size;
2068 }
2069 
2070 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2071 				     struct ipv4_devconf *devconf, u32 portid,
2072 				     u32 seq, int event, unsigned int flags,
2073 				     int type)
2074 {
2075 	struct nlmsghdr  *nlh;
2076 	struct netconfmsg *ncm;
2077 	bool all = false;
2078 
2079 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2080 			flags);
2081 	if (!nlh)
2082 		return -EMSGSIZE;
2083 
2084 	if (type == NETCONFA_ALL)
2085 		all = true;
2086 
2087 	ncm = nlmsg_data(nlh);
2088 	ncm->ncm_family = AF_INET;
2089 
2090 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2091 		goto nla_put_failure;
2092 
2093 	if (!devconf)
2094 		goto out;
2095 
2096 	if ((all || type == NETCONFA_FORWARDING) &&
2097 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2098 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2099 		goto nla_put_failure;
2100 	if ((all || type == NETCONFA_RP_FILTER) &&
2101 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2102 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2103 		goto nla_put_failure;
2104 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2105 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2106 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2107 		goto nla_put_failure;
2108 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2109 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2110 			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2111 		goto nla_put_failure;
2112 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2113 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2114 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2115 		goto nla_put_failure;
2116 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2117 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2118 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2119 		goto nla_put_failure;
2120 
2121 out:
2122 	nlmsg_end(skb, nlh);
2123 	return 0;
2124 
2125 nla_put_failure:
2126 	nlmsg_cancel(skb, nlh);
2127 	return -EMSGSIZE;
2128 }
2129 
2130 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2131 				 int ifindex, struct ipv4_devconf *devconf)
2132 {
2133 	struct sk_buff *skb;
2134 	int err = -ENOBUFS;
2135 
2136 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2137 	if (!skb)
2138 		goto errout;
2139 
2140 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2141 					event, 0, type);
2142 	if (err < 0) {
2143 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2144 		WARN_ON(err == -EMSGSIZE);
2145 		kfree_skb(skb);
2146 		goto errout;
2147 	}
2148 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2149 	return;
2150 errout:
2151 	if (err < 0)
2152 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2153 }
2154 
2155 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2156 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2157 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2158 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2159 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2160 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2161 };
2162 
2163 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2164 				      const struct nlmsghdr *nlh,
2165 				      struct nlattr **tb,
2166 				      struct netlink_ext_ack *extack)
2167 {
2168 	int i, err;
2169 
2170 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2171 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2172 		return -EINVAL;
2173 	}
2174 
2175 	if (!netlink_strict_get_check(skb))
2176 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2177 					      tb, NETCONFA_MAX,
2178 					      devconf_ipv4_policy, extack);
2179 
2180 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2181 					    tb, NETCONFA_MAX,
2182 					    devconf_ipv4_policy, extack);
2183 	if (err)
2184 		return err;
2185 
2186 	for (i = 0; i <= NETCONFA_MAX; i++) {
2187 		if (!tb[i])
2188 			continue;
2189 
2190 		switch (i) {
2191 		case NETCONFA_IFINDEX:
2192 			break;
2193 		default:
2194 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2195 			return -EINVAL;
2196 		}
2197 	}
2198 
2199 	return 0;
2200 }
2201 
2202 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2203 				    struct nlmsghdr *nlh,
2204 				    struct netlink_ext_ack *extack)
2205 {
2206 	struct net *net = sock_net(in_skb->sk);
2207 	struct nlattr *tb[NETCONFA_MAX+1];
2208 	struct sk_buff *skb;
2209 	struct ipv4_devconf *devconf;
2210 	struct in_device *in_dev;
2211 	struct net_device *dev;
2212 	int ifindex;
2213 	int err;
2214 
2215 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2216 	if (err)
2217 		goto errout;
2218 
2219 	err = -EINVAL;
2220 	if (!tb[NETCONFA_IFINDEX])
2221 		goto errout;
2222 
2223 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2224 	switch (ifindex) {
2225 	case NETCONFA_IFINDEX_ALL:
2226 		devconf = net->ipv4.devconf_all;
2227 		break;
2228 	case NETCONFA_IFINDEX_DEFAULT:
2229 		devconf = net->ipv4.devconf_dflt;
2230 		break;
2231 	default:
2232 		dev = __dev_get_by_index(net, ifindex);
2233 		if (!dev)
2234 			goto errout;
2235 		in_dev = __in_dev_get_rtnl(dev);
2236 		if (!in_dev)
2237 			goto errout;
2238 		devconf = &in_dev->cnf;
2239 		break;
2240 	}
2241 
2242 	err = -ENOBUFS;
2243 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2244 	if (!skb)
2245 		goto errout;
2246 
2247 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2248 					NETLINK_CB(in_skb).portid,
2249 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2250 					NETCONFA_ALL);
2251 	if (err < 0) {
2252 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2253 		WARN_ON(err == -EMSGSIZE);
2254 		kfree_skb(skb);
2255 		goto errout;
2256 	}
2257 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2258 errout:
2259 	return err;
2260 }
2261 
2262 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2263 				     struct netlink_callback *cb)
2264 {
2265 	const struct nlmsghdr *nlh = cb->nlh;
2266 	struct net *net = sock_net(skb->sk);
2267 	int h, s_h;
2268 	int idx, s_idx;
2269 	struct net_device *dev;
2270 	struct in_device *in_dev;
2271 	struct hlist_head *head;
2272 
2273 	if (cb->strict_check) {
2274 		struct netlink_ext_ack *extack = cb->extack;
2275 		struct netconfmsg *ncm;
2276 
2277 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2278 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2279 			return -EINVAL;
2280 		}
2281 
2282 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2283 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2284 			return -EINVAL;
2285 		}
2286 	}
2287 
2288 	s_h = cb->args[0];
2289 	s_idx = idx = cb->args[1];
2290 
2291 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2292 		idx = 0;
2293 		head = &net->dev_index_head[h];
2294 		rcu_read_lock();
2295 		cb->seq = inet_base_seq(net);
2296 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2297 			if (idx < s_idx)
2298 				goto cont;
2299 			in_dev = __in_dev_get_rcu(dev);
2300 			if (!in_dev)
2301 				goto cont;
2302 
2303 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2304 						      &in_dev->cnf,
2305 						      NETLINK_CB(cb->skb).portid,
2306 						      nlh->nlmsg_seq,
2307 						      RTM_NEWNETCONF,
2308 						      NLM_F_MULTI,
2309 						      NETCONFA_ALL) < 0) {
2310 				rcu_read_unlock();
2311 				goto done;
2312 			}
2313 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2314 cont:
2315 			idx++;
2316 		}
2317 		rcu_read_unlock();
2318 	}
2319 	if (h == NETDEV_HASHENTRIES) {
2320 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2321 					      net->ipv4.devconf_all,
2322 					      NETLINK_CB(cb->skb).portid,
2323 					      nlh->nlmsg_seq,
2324 					      RTM_NEWNETCONF, NLM_F_MULTI,
2325 					      NETCONFA_ALL) < 0)
2326 			goto done;
2327 		else
2328 			h++;
2329 	}
2330 	if (h == NETDEV_HASHENTRIES + 1) {
2331 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2332 					      net->ipv4.devconf_dflt,
2333 					      NETLINK_CB(cb->skb).portid,
2334 					      nlh->nlmsg_seq,
2335 					      RTM_NEWNETCONF, NLM_F_MULTI,
2336 					      NETCONFA_ALL) < 0)
2337 			goto done;
2338 		else
2339 			h++;
2340 	}
2341 done:
2342 	cb->args[0] = h;
2343 	cb->args[1] = idx;
2344 
2345 	return skb->len;
2346 }
2347 
2348 #ifdef CONFIG_SYSCTL
2349 
2350 static void devinet_copy_dflt_conf(struct net *net, int i)
2351 {
2352 	struct net_device *dev;
2353 
2354 	rcu_read_lock();
2355 	for_each_netdev_rcu(net, dev) {
2356 		struct in_device *in_dev;
2357 
2358 		in_dev = __in_dev_get_rcu(dev);
2359 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2360 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2361 	}
2362 	rcu_read_unlock();
2363 }
2364 
2365 /* called with RTNL locked */
2366 static void inet_forward_change(struct net *net)
2367 {
2368 	struct net_device *dev;
2369 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2370 
2371 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2372 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2373 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2374 				    NETCONFA_FORWARDING,
2375 				    NETCONFA_IFINDEX_ALL,
2376 				    net->ipv4.devconf_all);
2377 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2378 				    NETCONFA_FORWARDING,
2379 				    NETCONFA_IFINDEX_DEFAULT,
2380 				    net->ipv4.devconf_dflt);
2381 
2382 	for_each_netdev(net, dev) {
2383 		struct in_device *in_dev;
2384 
2385 		if (on)
2386 			dev_disable_lro(dev);
2387 
2388 		in_dev = __in_dev_get_rtnl(dev);
2389 		if (in_dev) {
2390 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2391 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2392 						    NETCONFA_FORWARDING,
2393 						    dev->ifindex, &in_dev->cnf);
2394 		}
2395 	}
2396 }
2397 
2398 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2399 {
2400 	if (cnf == net->ipv4.devconf_dflt)
2401 		return NETCONFA_IFINDEX_DEFAULT;
2402 	else if (cnf == net->ipv4.devconf_all)
2403 		return NETCONFA_IFINDEX_ALL;
2404 	else {
2405 		struct in_device *idev
2406 			= container_of(cnf, struct in_device, cnf);
2407 		return idev->dev->ifindex;
2408 	}
2409 }
2410 
2411 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2412 			     void *buffer, size_t *lenp, loff_t *ppos)
2413 {
2414 	int old_value = *(int *)ctl->data;
2415 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2416 	int new_value = *(int *)ctl->data;
2417 
2418 	if (write) {
2419 		struct ipv4_devconf *cnf = ctl->extra1;
2420 		struct net *net = ctl->extra2;
2421 		int i = (int *)ctl->data - cnf->data;
2422 		int ifindex;
2423 
2424 		set_bit(i, cnf->state);
2425 
2426 		if (cnf == net->ipv4.devconf_dflt)
2427 			devinet_copy_dflt_conf(net, i);
2428 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2429 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2430 			if ((new_value == 0) && (old_value != 0))
2431 				rt_cache_flush(net);
2432 
2433 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2434 		    new_value != old_value)
2435 			rt_cache_flush(net);
2436 
2437 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2438 		    new_value != old_value) {
2439 			ifindex = devinet_conf_ifindex(net, cnf);
2440 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2441 						    NETCONFA_RP_FILTER,
2442 						    ifindex, cnf);
2443 		}
2444 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2445 		    new_value != old_value) {
2446 			ifindex = devinet_conf_ifindex(net, cnf);
2447 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2448 						    NETCONFA_PROXY_NEIGH,
2449 						    ifindex, cnf);
2450 		}
2451 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2452 		    new_value != old_value) {
2453 			ifindex = devinet_conf_ifindex(net, cnf);
2454 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2455 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2456 						    ifindex, cnf);
2457 		}
2458 	}
2459 
2460 	return ret;
2461 }
2462 
2463 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2464 				  void *buffer, size_t *lenp, loff_t *ppos)
2465 {
2466 	int *valp = ctl->data;
2467 	int val = *valp;
2468 	loff_t pos = *ppos;
2469 	struct net *net = ctl->extra2;
2470 	int ret;
2471 
2472 	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2473 		return -EPERM;
2474 
2475 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2476 
2477 	if (write && *valp != val) {
2478 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2479 			if (!rtnl_trylock()) {
2480 				/* Restore the original values before restarting */
2481 				*valp = val;
2482 				*ppos = pos;
2483 				return restart_syscall();
2484 			}
2485 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2486 				inet_forward_change(net);
2487 			} else {
2488 				struct ipv4_devconf *cnf = ctl->extra1;
2489 				struct in_device *idev =
2490 					container_of(cnf, struct in_device, cnf);
2491 				if (*valp)
2492 					dev_disable_lro(idev->dev);
2493 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2494 							    NETCONFA_FORWARDING,
2495 							    idev->dev->ifindex,
2496 							    cnf);
2497 			}
2498 			rtnl_unlock();
2499 			rt_cache_flush(net);
2500 		} else
2501 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2502 						    NETCONFA_FORWARDING,
2503 						    NETCONFA_IFINDEX_DEFAULT,
2504 						    net->ipv4.devconf_dflt);
2505 	}
2506 
2507 	return ret;
2508 }
2509 
2510 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2511 				void *buffer, size_t *lenp, loff_t *ppos)
2512 {
2513 	int *valp = ctl->data;
2514 	int val = *valp;
2515 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2516 	struct net *net = ctl->extra2;
2517 
2518 	if (write && *valp != val)
2519 		rt_cache_flush(net);
2520 
2521 	return ret;
2522 }
2523 
2524 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2525 	{ \
2526 		.procname	= name, \
2527 		.data		= ipv4_devconf.data + \
2528 				  IPV4_DEVCONF_ ## attr - 1, \
2529 		.maxlen		= sizeof(int), \
2530 		.mode		= mval, \
2531 		.proc_handler	= proc, \
2532 		.extra1		= &ipv4_devconf, \
2533 	}
2534 
2535 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2536 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2537 
2538 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2539 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2540 
2541 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2542 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2543 
2544 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2545 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2546 
2547 static struct devinet_sysctl_table {
2548 	struct ctl_table_header *sysctl_header;
2549 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2550 } devinet_sysctl = {
2551 	.devinet_vars = {
2552 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2553 					     devinet_sysctl_forward),
2554 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2555 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2556 
2557 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2558 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2559 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2560 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2561 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2562 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2563 					"accept_source_route"),
2564 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2565 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2566 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2567 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2568 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2569 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2570 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2571 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2572 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2573 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2574 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2575 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2576 		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2577 					"arp_evict_nocarrier"),
2578 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2579 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2580 					"force_igmp_version"),
2581 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2582 					"igmpv2_unsolicited_report_interval"),
2583 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2584 					"igmpv3_unsolicited_report_interval"),
2585 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2586 					"ignore_routes_with_linkdown"),
2587 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2588 					"drop_gratuitous_arp"),
2589 
2590 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2591 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2592 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2593 					      "promote_secondaries"),
2594 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2595 					      "route_localnet"),
2596 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2597 					      "drop_unicast_in_l2_multicast"),
2598 	},
2599 };
2600 
2601 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2602 				     int ifindex, struct ipv4_devconf *p)
2603 {
2604 	int i;
2605 	struct devinet_sysctl_table *t;
2606 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2607 
2608 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2609 	if (!t)
2610 		goto out;
2611 
2612 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2613 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2614 		t->devinet_vars[i].extra1 = p;
2615 		t->devinet_vars[i].extra2 = net;
2616 	}
2617 
2618 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2619 
2620 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2621 	if (!t->sysctl_header)
2622 		goto free;
2623 
2624 	p->sysctl = t;
2625 
2626 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2627 				    ifindex, p);
2628 	return 0;
2629 
2630 free:
2631 	kfree(t);
2632 out:
2633 	return -ENOMEM;
2634 }
2635 
2636 static void __devinet_sysctl_unregister(struct net *net,
2637 					struct ipv4_devconf *cnf, int ifindex)
2638 {
2639 	struct devinet_sysctl_table *t = cnf->sysctl;
2640 
2641 	if (t) {
2642 		cnf->sysctl = NULL;
2643 		unregister_net_sysctl_table(t->sysctl_header);
2644 		kfree(t);
2645 	}
2646 
2647 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2648 }
2649 
2650 static int devinet_sysctl_register(struct in_device *idev)
2651 {
2652 	int err;
2653 
2654 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2655 		return -EINVAL;
2656 
2657 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2658 	if (err)
2659 		return err;
2660 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2661 					idev->dev->ifindex, &idev->cnf);
2662 	if (err)
2663 		neigh_sysctl_unregister(idev->arp_parms);
2664 	return err;
2665 }
2666 
2667 static void devinet_sysctl_unregister(struct in_device *idev)
2668 {
2669 	struct net *net = dev_net(idev->dev);
2670 
2671 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2672 	neigh_sysctl_unregister(idev->arp_parms);
2673 }
2674 
2675 static struct ctl_table ctl_forward_entry[] = {
2676 	{
2677 		.procname	= "ip_forward",
2678 		.data		= &ipv4_devconf.data[
2679 					IPV4_DEVCONF_FORWARDING - 1],
2680 		.maxlen		= sizeof(int),
2681 		.mode		= 0644,
2682 		.proc_handler	= devinet_sysctl_forward,
2683 		.extra1		= &ipv4_devconf,
2684 		.extra2		= &init_net,
2685 	},
2686 	{ },
2687 };
2688 #endif
2689 
2690 static __net_init int devinet_init_net(struct net *net)
2691 {
2692 	int err;
2693 	struct ipv4_devconf *all, *dflt;
2694 #ifdef CONFIG_SYSCTL
2695 	struct ctl_table *tbl;
2696 	struct ctl_table_header *forw_hdr;
2697 #endif
2698 
2699 	err = -ENOMEM;
2700 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2701 	if (!all)
2702 		goto err_alloc_all;
2703 
2704 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2705 	if (!dflt)
2706 		goto err_alloc_dflt;
2707 
2708 #ifdef CONFIG_SYSCTL
2709 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2710 	if (!tbl)
2711 		goto err_alloc_ctl;
2712 
2713 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2714 	tbl[0].extra1 = all;
2715 	tbl[0].extra2 = net;
2716 #endif
2717 
2718 	if (!net_eq(net, &init_net)) {
2719 		switch (net_inherit_devconf()) {
2720 		case 3:
2721 			/* copy from the current netns */
2722 			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2723 			       sizeof(ipv4_devconf));
2724 			memcpy(dflt,
2725 			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2726 			       sizeof(ipv4_devconf_dflt));
2727 			break;
2728 		case 0:
2729 		case 1:
2730 			/* copy from init_net */
2731 			memcpy(all, init_net.ipv4.devconf_all,
2732 			       sizeof(ipv4_devconf));
2733 			memcpy(dflt, init_net.ipv4.devconf_dflt,
2734 			       sizeof(ipv4_devconf_dflt));
2735 			break;
2736 		case 2:
2737 			/* use compiled values */
2738 			break;
2739 		}
2740 	}
2741 
2742 #ifdef CONFIG_SYSCTL
2743 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2744 	if (err < 0)
2745 		goto err_reg_all;
2746 
2747 	err = __devinet_sysctl_register(net, "default",
2748 					NETCONFA_IFINDEX_DEFAULT, dflt);
2749 	if (err < 0)
2750 		goto err_reg_dflt;
2751 
2752 	err = -ENOMEM;
2753 	forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2754 					  ARRAY_SIZE(ctl_forward_entry));
2755 	if (!forw_hdr)
2756 		goto err_reg_ctl;
2757 	net->ipv4.forw_hdr = forw_hdr;
2758 #endif
2759 
2760 	net->ipv4.devconf_all = all;
2761 	net->ipv4.devconf_dflt = dflt;
2762 	return 0;
2763 
2764 #ifdef CONFIG_SYSCTL
2765 err_reg_ctl:
2766 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2767 err_reg_dflt:
2768 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2769 err_reg_all:
2770 	kfree(tbl);
2771 err_alloc_ctl:
2772 #endif
2773 	kfree(dflt);
2774 err_alloc_dflt:
2775 	kfree(all);
2776 err_alloc_all:
2777 	return err;
2778 }
2779 
2780 static __net_exit void devinet_exit_net(struct net *net)
2781 {
2782 #ifdef CONFIG_SYSCTL
2783 	struct ctl_table *tbl;
2784 
2785 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2786 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2787 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2788 				    NETCONFA_IFINDEX_DEFAULT);
2789 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2790 				    NETCONFA_IFINDEX_ALL);
2791 	kfree(tbl);
2792 #endif
2793 	kfree(net->ipv4.devconf_dflt);
2794 	kfree(net->ipv4.devconf_all);
2795 }
2796 
2797 static __net_initdata struct pernet_operations devinet_ops = {
2798 	.init = devinet_init_net,
2799 	.exit = devinet_exit_net,
2800 };
2801 
2802 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2803 	.family		  = AF_INET,
2804 	.fill_link_af	  = inet_fill_link_af,
2805 	.get_link_af_size = inet_get_link_af_size,
2806 	.validate_link_af = inet_validate_link_af,
2807 	.set_link_af	  = inet_set_link_af,
2808 };
2809 
2810 void __init devinet_init(void)
2811 {
2812 	int i;
2813 
2814 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2815 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2816 
2817 	register_pernet_subsys(&devinet_ops);
2818 	register_netdevice_notifier(&ip_netdev_notifier);
2819 
2820 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2821 
2822 	rtnl_af_register(&inet_af_ops);
2823 
2824 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2825 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2826 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2827 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2828 		      inet_netconf_dump_devconf, 0);
2829 }
2830