xref: /openbmc/linux/net/ipv4/devinet.c (revision 6db6b729)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	NET3	IP device support routines.
4  *
5  *	Derived from the IP parts of dev.c 1.0.19
6  * 		Authors:	Ross Biro
7  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *	Additional Authors:
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *	Changes:
15  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16  *					lists.
17  *		Cyrus Durgin:		updated for kmod
18  *		Matthias Andree:	in devinet_ioctl, compare label and
19  *					address (4.4BSD alias style support),
20  *					fall back to comparing just the label
21  *					if no match found.
22  */
23 
24 
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56 
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64 
65 #define IPV6ONLY_FLAGS	\
66 		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79 	},
80 };
81 
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83 	.data = {
84 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92 	},
93 };
94 
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97 
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 	[IFA_LOCAL]     	= { .type = NLA_U32 },
100 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
101 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104 	[IFA_FLAGS]		= { .type = NLA_U32 },
105 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107 	[IFA_PROTO]		= { .type = NLA_U8 },
108 };
109 
110 struct inet_fill_args {
111 	u32 portid;
112 	u32 seq;
113 	int event;
114 	unsigned int flags;
115 	int netnsid;
116 	int ifindex;
117 };
118 
119 #define IN4_ADDR_HSIZE_SHIFT	8
120 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
121 
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123 
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
125 {
126 	u32 val = (__force u32) addr ^ net_hash_mix(net);
127 
128 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129 }
130 
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132 {
133 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
134 
135 	ASSERT_RTNL();
136 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137 }
138 
139 static void inet_hash_remove(struct in_ifaddr *ifa)
140 {
141 	ASSERT_RTNL();
142 	hlist_del_init_rcu(&ifa->hash);
143 }
144 
145 /**
146  * __ip_dev_find - find the first device with a given source address.
147  * @net: the net namespace
148  * @addr: the source address
149  * @devref: if true, take a reference on the found device
150  *
151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
152  */
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154 {
155 	struct net_device *result = NULL;
156 	struct in_ifaddr *ifa;
157 
158 	rcu_read_lock();
159 	ifa = inet_lookup_ifaddr_rcu(net, addr);
160 	if (!ifa) {
161 		struct flowi4 fl4 = { .daddr = addr };
162 		struct fib_result res = { 0 };
163 		struct fib_table *local;
164 
165 		/* Fallback to FIB local table so that communication
166 		 * over loopback subnets work.
167 		 */
168 		local = fib_get_table(net, RT_TABLE_LOCAL);
169 		if (local &&
170 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171 		    res.type == RTN_LOCAL)
172 			result = FIB_RES_DEV(res);
173 	} else {
174 		result = ifa->ifa_dev->dev;
175 	}
176 	if (result && devref)
177 		dev_hold(result);
178 	rcu_read_unlock();
179 	return result;
180 }
181 EXPORT_SYMBOL(__ip_dev_find);
182 
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185 {
186 	u32 hash = inet_addr_hash(net, addr);
187 	struct in_ifaddr *ifa;
188 
189 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190 		if (ifa->ifa_local == addr &&
191 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
192 			return ifa;
193 
194 	return NULL;
195 }
196 
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198 
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202 			 struct in_ifaddr __rcu **ifap,
203 			 int destroy);
204 #ifdef CONFIG_SYSCTL
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
207 #else
208 static int devinet_sysctl_register(struct in_device *idev)
209 {
210 	return 0;
211 }
212 static void devinet_sysctl_unregister(struct in_device *idev)
213 {
214 }
215 #endif
216 
217 /* Locks all the inet devices. */
218 
219 static struct in_ifaddr *inet_alloc_ifa(void)
220 {
221 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222 }
223 
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227 	if (ifa->ifa_dev)
228 		in_dev_put(ifa->ifa_dev);
229 	kfree(ifa);
230 }
231 
232 static void inet_free_ifa(struct in_ifaddr *ifa)
233 {
234 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
235 }
236 
237 static void in_dev_free_rcu(struct rcu_head *head)
238 {
239 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
240 
241 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
242 	kfree(idev);
243 }
244 
245 void in_dev_finish_destroy(struct in_device *idev)
246 {
247 	struct net_device *dev = idev->dev;
248 
249 	WARN_ON(idev->ifa_list);
250 	WARN_ON(idev->mc_list);
251 #ifdef NET_REFCNT_DEBUG
252 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
253 #endif
254 	netdev_put(dev, &idev->dev_tracker);
255 	if (!idev->dead)
256 		pr_err("Freeing alive in_device %p\n", idev);
257 	else
258 		call_rcu(&idev->rcu_head, in_dev_free_rcu);
259 }
260 EXPORT_SYMBOL(in_dev_finish_destroy);
261 
262 static struct in_device *inetdev_init(struct net_device *dev)
263 {
264 	struct in_device *in_dev;
265 	int err = -ENOMEM;
266 
267 	ASSERT_RTNL();
268 
269 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
270 	if (!in_dev)
271 		goto out;
272 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
273 			sizeof(in_dev->cnf));
274 	in_dev->cnf.sysctl = NULL;
275 	in_dev->dev = dev;
276 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
277 	if (!in_dev->arp_parms)
278 		goto out_kfree;
279 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
280 		dev_disable_lro(dev);
281 	/* Reference in_dev->dev */
282 	netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
283 	/* Account for reference dev->ip_ptr (below) */
284 	refcount_set(&in_dev->refcnt, 1);
285 
286 	err = devinet_sysctl_register(in_dev);
287 	if (err) {
288 		in_dev->dead = 1;
289 		neigh_parms_release(&arp_tbl, in_dev->arp_parms);
290 		in_dev_put(in_dev);
291 		in_dev = NULL;
292 		goto out;
293 	}
294 	ip_mc_init_dev(in_dev);
295 	if (dev->flags & IFF_UP)
296 		ip_mc_up(in_dev);
297 
298 	/* we can receive as soon as ip_ptr is set -- do this last */
299 	rcu_assign_pointer(dev->ip_ptr, in_dev);
300 out:
301 	return in_dev ?: ERR_PTR(err);
302 out_kfree:
303 	kfree(in_dev);
304 	in_dev = NULL;
305 	goto out;
306 }
307 
308 static void inetdev_destroy(struct in_device *in_dev)
309 {
310 	struct net_device *dev;
311 	struct in_ifaddr *ifa;
312 
313 	ASSERT_RTNL();
314 
315 	dev = in_dev->dev;
316 
317 	in_dev->dead = 1;
318 
319 	ip_mc_destroy_dev(in_dev);
320 
321 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
322 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
323 		inet_free_ifa(ifa);
324 	}
325 
326 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
327 
328 	devinet_sysctl_unregister(in_dev);
329 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
330 	arp_ifdown(dev);
331 
332 	in_dev_put(in_dev);
333 }
334 
335 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
336 {
337 	const struct in_ifaddr *ifa;
338 
339 	rcu_read_lock();
340 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
341 		if (inet_ifa_match(a, ifa)) {
342 			if (!b || inet_ifa_match(b, ifa)) {
343 				rcu_read_unlock();
344 				return 1;
345 			}
346 		}
347 	}
348 	rcu_read_unlock();
349 	return 0;
350 }
351 
352 static void __inet_del_ifa(struct in_device *in_dev,
353 			   struct in_ifaddr __rcu **ifap,
354 			   int destroy, struct nlmsghdr *nlh, u32 portid)
355 {
356 	struct in_ifaddr *promote = NULL;
357 	struct in_ifaddr *ifa, *ifa1;
358 	struct in_ifaddr __rcu **last_prim;
359 	struct in_ifaddr *prev_prom = NULL;
360 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
361 
362 	ASSERT_RTNL();
363 
364 	ifa1 = rtnl_dereference(*ifap);
365 	last_prim = ifap;
366 	if (in_dev->dead)
367 		goto no_promotions;
368 
369 	/* 1. Deleting primary ifaddr forces deletion all secondaries
370 	 * unless alias promotion is set
371 	 **/
372 
373 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
374 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
375 
376 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
377 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
378 			    ifa1->ifa_scope <= ifa->ifa_scope)
379 				last_prim = &ifa->ifa_next;
380 
381 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
382 			    ifa1->ifa_mask != ifa->ifa_mask ||
383 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
384 				ifap1 = &ifa->ifa_next;
385 				prev_prom = ifa;
386 				continue;
387 			}
388 
389 			if (!do_promote) {
390 				inet_hash_remove(ifa);
391 				*ifap1 = ifa->ifa_next;
392 
393 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
394 				blocking_notifier_call_chain(&inetaddr_chain,
395 						NETDEV_DOWN, ifa);
396 				inet_free_ifa(ifa);
397 			} else {
398 				promote = ifa;
399 				break;
400 			}
401 		}
402 	}
403 
404 	/* On promotion all secondaries from subnet are changing
405 	 * the primary IP, we must remove all their routes silently
406 	 * and later to add them back with new prefsrc. Do this
407 	 * while all addresses are on the device list.
408 	 */
409 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
410 		if (ifa1->ifa_mask == ifa->ifa_mask &&
411 		    inet_ifa_match(ifa1->ifa_address, ifa))
412 			fib_del_ifaddr(ifa, ifa1);
413 	}
414 
415 no_promotions:
416 	/* 2. Unlink it */
417 
418 	*ifap = ifa1->ifa_next;
419 	inet_hash_remove(ifa1);
420 
421 	/* 3. Announce address deletion */
422 
423 	/* Send message first, then call notifier.
424 	   At first sight, FIB update triggered by notifier
425 	   will refer to already deleted ifaddr, that could confuse
426 	   netlink listeners. It is not true: look, gated sees
427 	   that route deleted and if it still thinks that ifaddr
428 	   is valid, it will try to restore deleted routes... Grr.
429 	   So that, this order is correct.
430 	 */
431 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
432 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
433 
434 	if (promote) {
435 		struct in_ifaddr *next_sec;
436 
437 		next_sec = rtnl_dereference(promote->ifa_next);
438 		if (prev_prom) {
439 			struct in_ifaddr *last_sec;
440 
441 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
442 
443 			last_sec = rtnl_dereference(*last_prim);
444 			rcu_assign_pointer(promote->ifa_next, last_sec);
445 			rcu_assign_pointer(*last_prim, promote);
446 		}
447 
448 		promote->ifa_flags &= ~IFA_F_SECONDARY;
449 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
450 		blocking_notifier_call_chain(&inetaddr_chain,
451 				NETDEV_UP, promote);
452 		for (ifa = next_sec; ifa;
453 		     ifa = rtnl_dereference(ifa->ifa_next)) {
454 			if (ifa1->ifa_mask != ifa->ifa_mask ||
455 			    !inet_ifa_match(ifa1->ifa_address, ifa))
456 					continue;
457 			fib_add_ifaddr(ifa);
458 		}
459 
460 	}
461 	if (destroy)
462 		inet_free_ifa(ifa1);
463 }
464 
465 static void inet_del_ifa(struct in_device *in_dev,
466 			 struct in_ifaddr __rcu **ifap,
467 			 int destroy)
468 {
469 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
470 }
471 
472 static void check_lifetime(struct work_struct *work);
473 
474 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
475 
476 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
477 			     u32 portid, struct netlink_ext_ack *extack)
478 {
479 	struct in_ifaddr __rcu **last_primary, **ifap;
480 	struct in_device *in_dev = ifa->ifa_dev;
481 	struct in_validator_info ivi;
482 	struct in_ifaddr *ifa1;
483 	int ret;
484 
485 	ASSERT_RTNL();
486 
487 	if (!ifa->ifa_local) {
488 		inet_free_ifa(ifa);
489 		return 0;
490 	}
491 
492 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
493 	last_primary = &in_dev->ifa_list;
494 
495 	/* Don't set IPv6 only flags to IPv4 addresses */
496 	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
497 
498 	ifap = &in_dev->ifa_list;
499 	ifa1 = rtnl_dereference(*ifap);
500 
501 	while (ifa1) {
502 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
503 		    ifa->ifa_scope <= ifa1->ifa_scope)
504 			last_primary = &ifa1->ifa_next;
505 		if (ifa1->ifa_mask == ifa->ifa_mask &&
506 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
507 			if (ifa1->ifa_local == ifa->ifa_local) {
508 				inet_free_ifa(ifa);
509 				return -EEXIST;
510 			}
511 			if (ifa1->ifa_scope != ifa->ifa_scope) {
512 				NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
513 				inet_free_ifa(ifa);
514 				return -EINVAL;
515 			}
516 			ifa->ifa_flags |= IFA_F_SECONDARY;
517 		}
518 
519 		ifap = &ifa1->ifa_next;
520 		ifa1 = rtnl_dereference(*ifap);
521 	}
522 
523 	/* Allow any devices that wish to register ifaddr validtors to weigh
524 	 * in now, before changes are committed.  The rntl lock is serializing
525 	 * access here, so the state should not change between a validator call
526 	 * and a final notify on commit.  This isn't invoked on promotion under
527 	 * the assumption that validators are checking the address itself, and
528 	 * not the flags.
529 	 */
530 	ivi.ivi_addr = ifa->ifa_address;
531 	ivi.ivi_dev = ifa->ifa_dev;
532 	ivi.extack = extack;
533 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
534 					   NETDEV_UP, &ivi);
535 	ret = notifier_to_errno(ret);
536 	if (ret) {
537 		inet_free_ifa(ifa);
538 		return ret;
539 	}
540 
541 	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
542 		ifap = last_primary;
543 
544 	rcu_assign_pointer(ifa->ifa_next, *ifap);
545 	rcu_assign_pointer(*ifap, ifa);
546 
547 	inet_hash_insert(dev_net(in_dev->dev), ifa);
548 
549 	cancel_delayed_work(&check_lifetime_work);
550 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
551 
552 	/* Send message first, then call notifier.
553 	   Notifier will trigger FIB update, so that
554 	   listeners of netlink will know about new ifaddr */
555 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
556 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
557 
558 	return 0;
559 }
560 
561 static int inet_insert_ifa(struct in_ifaddr *ifa)
562 {
563 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
564 }
565 
566 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
567 {
568 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
569 
570 	ASSERT_RTNL();
571 
572 	if (!in_dev) {
573 		inet_free_ifa(ifa);
574 		return -ENOBUFS;
575 	}
576 	ipv4_devconf_setall(in_dev);
577 	neigh_parms_data_state_setall(in_dev->arp_parms);
578 	if (ifa->ifa_dev != in_dev) {
579 		WARN_ON(ifa->ifa_dev);
580 		in_dev_hold(in_dev);
581 		ifa->ifa_dev = in_dev;
582 	}
583 	if (ipv4_is_loopback(ifa->ifa_local))
584 		ifa->ifa_scope = RT_SCOPE_HOST;
585 	return inet_insert_ifa(ifa);
586 }
587 
588 /* Caller must hold RCU or RTNL :
589  * We dont take a reference on found in_device
590  */
591 struct in_device *inetdev_by_index(struct net *net, int ifindex)
592 {
593 	struct net_device *dev;
594 	struct in_device *in_dev = NULL;
595 
596 	rcu_read_lock();
597 	dev = dev_get_by_index_rcu(net, ifindex);
598 	if (dev)
599 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
600 	rcu_read_unlock();
601 	return in_dev;
602 }
603 EXPORT_SYMBOL(inetdev_by_index);
604 
605 /* Called only from RTNL semaphored context. No locks. */
606 
607 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
608 				    __be32 mask)
609 {
610 	struct in_ifaddr *ifa;
611 
612 	ASSERT_RTNL();
613 
614 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
615 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
616 			return ifa;
617 	}
618 	return NULL;
619 }
620 
621 static int ip_mc_autojoin_config(struct net *net, bool join,
622 				 const struct in_ifaddr *ifa)
623 {
624 #if defined(CONFIG_IP_MULTICAST)
625 	struct ip_mreqn mreq = {
626 		.imr_multiaddr.s_addr = ifa->ifa_address,
627 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
628 	};
629 	struct sock *sk = net->ipv4.mc_autojoin_sk;
630 	int ret;
631 
632 	ASSERT_RTNL();
633 
634 	lock_sock(sk);
635 	if (join)
636 		ret = ip_mc_join_group(sk, &mreq);
637 	else
638 		ret = ip_mc_leave_group(sk, &mreq);
639 	release_sock(sk);
640 
641 	return ret;
642 #else
643 	return -EOPNOTSUPP;
644 #endif
645 }
646 
647 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
648 			    struct netlink_ext_ack *extack)
649 {
650 	struct net *net = sock_net(skb->sk);
651 	struct in_ifaddr __rcu **ifap;
652 	struct nlattr *tb[IFA_MAX+1];
653 	struct in_device *in_dev;
654 	struct ifaddrmsg *ifm;
655 	struct in_ifaddr *ifa;
656 	int err;
657 
658 	ASSERT_RTNL();
659 
660 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
661 				     ifa_ipv4_policy, extack);
662 	if (err < 0)
663 		goto errout;
664 
665 	ifm = nlmsg_data(nlh);
666 	in_dev = inetdev_by_index(net, ifm->ifa_index);
667 	if (!in_dev) {
668 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
669 		err = -ENODEV;
670 		goto errout;
671 	}
672 
673 	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
674 	     ifap = &ifa->ifa_next) {
675 		if (tb[IFA_LOCAL] &&
676 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
677 			continue;
678 
679 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
680 			continue;
681 
682 		if (tb[IFA_ADDRESS] &&
683 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
684 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
685 			continue;
686 
687 		if (ipv4_is_multicast(ifa->ifa_address))
688 			ip_mc_autojoin_config(net, false, ifa);
689 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
690 		return 0;
691 	}
692 
693 	NL_SET_ERR_MSG(extack, "ipv4: Address not found");
694 	err = -EADDRNOTAVAIL;
695 errout:
696 	return err;
697 }
698 
699 #define INFINITY_LIFE_TIME	0xFFFFFFFF
700 
701 static void check_lifetime(struct work_struct *work)
702 {
703 	unsigned long now, next, next_sec, next_sched;
704 	struct in_ifaddr *ifa;
705 	struct hlist_node *n;
706 	int i;
707 
708 	now = jiffies;
709 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
710 
711 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
712 		bool change_needed = false;
713 
714 		rcu_read_lock();
715 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
716 			unsigned long age;
717 
718 			if (ifa->ifa_flags & IFA_F_PERMANENT)
719 				continue;
720 
721 			/* We try to batch several events at once. */
722 			age = (now - ifa->ifa_tstamp +
723 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
724 
725 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
726 			    age >= ifa->ifa_valid_lft) {
727 				change_needed = true;
728 			} else if (ifa->ifa_preferred_lft ==
729 				   INFINITY_LIFE_TIME) {
730 				continue;
731 			} else if (age >= ifa->ifa_preferred_lft) {
732 				if (time_before(ifa->ifa_tstamp +
733 						ifa->ifa_valid_lft * HZ, next))
734 					next = ifa->ifa_tstamp +
735 					       ifa->ifa_valid_lft * HZ;
736 
737 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
738 					change_needed = true;
739 			} else if (time_before(ifa->ifa_tstamp +
740 					       ifa->ifa_preferred_lft * HZ,
741 					       next)) {
742 				next = ifa->ifa_tstamp +
743 				       ifa->ifa_preferred_lft * HZ;
744 			}
745 		}
746 		rcu_read_unlock();
747 		if (!change_needed)
748 			continue;
749 		rtnl_lock();
750 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
751 			unsigned long age;
752 
753 			if (ifa->ifa_flags & IFA_F_PERMANENT)
754 				continue;
755 
756 			/* We try to batch several events at once. */
757 			age = (now - ifa->ifa_tstamp +
758 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
759 
760 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
761 			    age >= ifa->ifa_valid_lft) {
762 				struct in_ifaddr __rcu **ifap;
763 				struct in_ifaddr *tmp;
764 
765 				ifap = &ifa->ifa_dev->ifa_list;
766 				tmp = rtnl_dereference(*ifap);
767 				while (tmp) {
768 					if (tmp == ifa) {
769 						inet_del_ifa(ifa->ifa_dev,
770 							     ifap, 1);
771 						break;
772 					}
773 					ifap = &tmp->ifa_next;
774 					tmp = rtnl_dereference(*ifap);
775 				}
776 			} else if (ifa->ifa_preferred_lft !=
777 				   INFINITY_LIFE_TIME &&
778 				   age >= ifa->ifa_preferred_lft &&
779 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
780 				ifa->ifa_flags |= IFA_F_DEPRECATED;
781 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
782 			}
783 		}
784 		rtnl_unlock();
785 	}
786 
787 	next_sec = round_jiffies_up(next);
788 	next_sched = next;
789 
790 	/* If rounded timeout is accurate enough, accept it. */
791 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
792 		next_sched = next_sec;
793 
794 	now = jiffies;
795 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
796 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
797 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
798 
799 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
800 			next_sched - now);
801 }
802 
803 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
804 			     __u32 prefered_lft)
805 {
806 	unsigned long timeout;
807 
808 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
809 
810 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
811 	if (addrconf_finite_timeout(timeout))
812 		ifa->ifa_valid_lft = timeout;
813 	else
814 		ifa->ifa_flags |= IFA_F_PERMANENT;
815 
816 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
817 	if (addrconf_finite_timeout(timeout)) {
818 		if (timeout == 0)
819 			ifa->ifa_flags |= IFA_F_DEPRECATED;
820 		ifa->ifa_preferred_lft = timeout;
821 	}
822 	ifa->ifa_tstamp = jiffies;
823 	if (!ifa->ifa_cstamp)
824 		ifa->ifa_cstamp = ifa->ifa_tstamp;
825 }
826 
827 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
828 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
829 				       struct netlink_ext_ack *extack)
830 {
831 	struct nlattr *tb[IFA_MAX+1];
832 	struct in_ifaddr *ifa;
833 	struct ifaddrmsg *ifm;
834 	struct net_device *dev;
835 	struct in_device *in_dev;
836 	int err;
837 
838 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
839 				     ifa_ipv4_policy, extack);
840 	if (err < 0)
841 		goto errout;
842 
843 	ifm = nlmsg_data(nlh);
844 	err = -EINVAL;
845 
846 	if (ifm->ifa_prefixlen > 32) {
847 		NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
848 		goto errout;
849 	}
850 
851 	if (!tb[IFA_LOCAL]) {
852 		NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
853 		goto errout;
854 	}
855 
856 	dev = __dev_get_by_index(net, ifm->ifa_index);
857 	err = -ENODEV;
858 	if (!dev) {
859 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
860 		goto errout;
861 	}
862 
863 	in_dev = __in_dev_get_rtnl(dev);
864 	err = -ENOBUFS;
865 	if (!in_dev)
866 		goto errout;
867 
868 	ifa = inet_alloc_ifa();
869 	if (!ifa)
870 		/*
871 		 * A potential indev allocation can be left alive, it stays
872 		 * assigned to its device and is destroy with it.
873 		 */
874 		goto errout;
875 
876 	ipv4_devconf_setall(in_dev);
877 	neigh_parms_data_state_setall(in_dev->arp_parms);
878 	in_dev_hold(in_dev);
879 
880 	if (!tb[IFA_ADDRESS])
881 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
882 
883 	INIT_HLIST_NODE(&ifa->hash);
884 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
885 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
886 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
887 					 ifm->ifa_flags;
888 	ifa->ifa_scope = ifm->ifa_scope;
889 	ifa->ifa_dev = in_dev;
890 
891 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
892 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
893 
894 	if (tb[IFA_BROADCAST])
895 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
896 
897 	if (tb[IFA_LABEL])
898 		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
899 	else
900 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
901 
902 	if (tb[IFA_RT_PRIORITY])
903 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
904 
905 	if (tb[IFA_PROTO])
906 		ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
907 
908 	if (tb[IFA_CACHEINFO]) {
909 		struct ifa_cacheinfo *ci;
910 
911 		ci = nla_data(tb[IFA_CACHEINFO]);
912 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
913 			NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
914 			err = -EINVAL;
915 			goto errout_free;
916 		}
917 		*pvalid_lft = ci->ifa_valid;
918 		*pprefered_lft = ci->ifa_prefered;
919 	}
920 
921 	return ifa;
922 
923 errout_free:
924 	inet_free_ifa(ifa);
925 errout:
926 	return ERR_PTR(err);
927 }
928 
929 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
930 {
931 	struct in_device *in_dev = ifa->ifa_dev;
932 	struct in_ifaddr *ifa1;
933 
934 	if (!ifa->ifa_local)
935 		return NULL;
936 
937 	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
938 		if (ifa1->ifa_mask == ifa->ifa_mask &&
939 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
940 		    ifa1->ifa_local == ifa->ifa_local)
941 			return ifa1;
942 	}
943 	return NULL;
944 }
945 
946 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
947 			    struct netlink_ext_ack *extack)
948 {
949 	struct net *net = sock_net(skb->sk);
950 	struct in_ifaddr *ifa;
951 	struct in_ifaddr *ifa_existing;
952 	__u32 valid_lft = INFINITY_LIFE_TIME;
953 	__u32 prefered_lft = INFINITY_LIFE_TIME;
954 
955 	ASSERT_RTNL();
956 
957 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
958 	if (IS_ERR(ifa))
959 		return PTR_ERR(ifa);
960 
961 	ifa_existing = find_matching_ifa(ifa);
962 	if (!ifa_existing) {
963 		/* It would be best to check for !NLM_F_CREATE here but
964 		 * userspace already relies on not having to provide this.
965 		 */
966 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
967 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
968 			int ret = ip_mc_autojoin_config(net, true, ifa);
969 
970 			if (ret < 0) {
971 				NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
972 				inet_free_ifa(ifa);
973 				return ret;
974 			}
975 		}
976 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
977 					 extack);
978 	} else {
979 		u32 new_metric = ifa->ifa_rt_priority;
980 		u8 new_proto = ifa->ifa_proto;
981 
982 		inet_free_ifa(ifa);
983 
984 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
985 		    !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
986 			NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
987 			return -EEXIST;
988 		}
989 		ifa = ifa_existing;
990 
991 		if (ifa->ifa_rt_priority != new_metric) {
992 			fib_modify_prefix_metric(ifa, new_metric);
993 			ifa->ifa_rt_priority = new_metric;
994 		}
995 
996 		ifa->ifa_proto = new_proto;
997 
998 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
999 		cancel_delayed_work(&check_lifetime_work);
1000 		queue_delayed_work(system_power_efficient_wq,
1001 				&check_lifetime_work, 0);
1002 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1003 	}
1004 	return 0;
1005 }
1006 
1007 /*
1008  *	Determine a default network mask, based on the IP address.
1009  */
1010 
1011 static int inet_abc_len(__be32 addr)
1012 {
1013 	int rc = -1;	/* Something else, probably a multicast. */
1014 
1015 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1016 		rc = 0;
1017 	else {
1018 		__u32 haddr = ntohl(addr);
1019 		if (IN_CLASSA(haddr))
1020 			rc = 8;
1021 		else if (IN_CLASSB(haddr))
1022 			rc = 16;
1023 		else if (IN_CLASSC(haddr))
1024 			rc = 24;
1025 		else if (IN_CLASSE(haddr))
1026 			rc = 32;
1027 	}
1028 
1029 	return rc;
1030 }
1031 
1032 
1033 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1034 {
1035 	struct sockaddr_in sin_orig;
1036 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1037 	struct in_ifaddr __rcu **ifap = NULL;
1038 	struct in_device *in_dev;
1039 	struct in_ifaddr *ifa = NULL;
1040 	struct net_device *dev;
1041 	char *colon;
1042 	int ret = -EFAULT;
1043 	int tryaddrmatch = 0;
1044 
1045 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1046 
1047 	/* save original address for comparison */
1048 	memcpy(&sin_orig, sin, sizeof(*sin));
1049 
1050 	colon = strchr(ifr->ifr_name, ':');
1051 	if (colon)
1052 		*colon = 0;
1053 
1054 	dev_load(net, ifr->ifr_name);
1055 
1056 	switch (cmd) {
1057 	case SIOCGIFADDR:	/* Get interface address */
1058 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1059 	case SIOCGIFDSTADDR:	/* Get the destination address */
1060 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1061 		/* Note that these ioctls will not sleep,
1062 		   so that we do not impose a lock.
1063 		   One day we will be forced to put shlock here (I mean SMP)
1064 		 */
1065 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1066 		memset(sin, 0, sizeof(*sin));
1067 		sin->sin_family = AF_INET;
1068 		break;
1069 
1070 	case SIOCSIFFLAGS:
1071 		ret = -EPERM;
1072 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1073 			goto out;
1074 		break;
1075 	case SIOCSIFADDR:	/* Set interface address (and family) */
1076 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1077 	case SIOCSIFDSTADDR:	/* Set the destination address */
1078 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1079 		ret = -EPERM;
1080 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1081 			goto out;
1082 		ret = -EINVAL;
1083 		if (sin->sin_family != AF_INET)
1084 			goto out;
1085 		break;
1086 	default:
1087 		ret = -EINVAL;
1088 		goto out;
1089 	}
1090 
1091 	rtnl_lock();
1092 
1093 	ret = -ENODEV;
1094 	dev = __dev_get_by_name(net, ifr->ifr_name);
1095 	if (!dev)
1096 		goto done;
1097 
1098 	if (colon)
1099 		*colon = ':';
1100 
1101 	in_dev = __in_dev_get_rtnl(dev);
1102 	if (in_dev) {
1103 		if (tryaddrmatch) {
1104 			/* Matthias Andree */
1105 			/* compare label and address (4.4BSD style) */
1106 			/* note: we only do this for a limited set of ioctls
1107 			   and only if the original address family was AF_INET.
1108 			   This is checked above. */
1109 
1110 			for (ifap = &in_dev->ifa_list;
1111 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1112 			     ifap = &ifa->ifa_next) {
1113 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1114 				    sin_orig.sin_addr.s_addr ==
1115 							ifa->ifa_local) {
1116 					break; /* found */
1117 				}
1118 			}
1119 		}
1120 		/* we didn't get a match, maybe the application is
1121 		   4.3BSD-style and passed in junk so we fall back to
1122 		   comparing just the label */
1123 		if (!ifa) {
1124 			for (ifap = &in_dev->ifa_list;
1125 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1126 			     ifap = &ifa->ifa_next)
1127 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1128 					break;
1129 		}
1130 	}
1131 
1132 	ret = -EADDRNOTAVAIL;
1133 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1134 		goto done;
1135 
1136 	switch (cmd) {
1137 	case SIOCGIFADDR:	/* Get interface address */
1138 		ret = 0;
1139 		sin->sin_addr.s_addr = ifa->ifa_local;
1140 		break;
1141 
1142 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1143 		ret = 0;
1144 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1145 		break;
1146 
1147 	case SIOCGIFDSTADDR:	/* Get the destination address */
1148 		ret = 0;
1149 		sin->sin_addr.s_addr = ifa->ifa_address;
1150 		break;
1151 
1152 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1153 		ret = 0;
1154 		sin->sin_addr.s_addr = ifa->ifa_mask;
1155 		break;
1156 
1157 	case SIOCSIFFLAGS:
1158 		if (colon) {
1159 			ret = -EADDRNOTAVAIL;
1160 			if (!ifa)
1161 				break;
1162 			ret = 0;
1163 			if (!(ifr->ifr_flags & IFF_UP))
1164 				inet_del_ifa(in_dev, ifap, 1);
1165 			break;
1166 		}
1167 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1168 		break;
1169 
1170 	case SIOCSIFADDR:	/* Set interface address (and family) */
1171 		ret = -EINVAL;
1172 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1173 			break;
1174 
1175 		if (!ifa) {
1176 			ret = -ENOBUFS;
1177 			ifa = inet_alloc_ifa();
1178 			if (!ifa)
1179 				break;
1180 			INIT_HLIST_NODE(&ifa->hash);
1181 			if (colon)
1182 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1183 			else
1184 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1185 		} else {
1186 			ret = 0;
1187 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1188 				break;
1189 			inet_del_ifa(in_dev, ifap, 0);
1190 			ifa->ifa_broadcast = 0;
1191 			ifa->ifa_scope = 0;
1192 		}
1193 
1194 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1195 
1196 		if (!(dev->flags & IFF_POINTOPOINT)) {
1197 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1198 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1199 			if ((dev->flags & IFF_BROADCAST) &&
1200 			    ifa->ifa_prefixlen < 31)
1201 				ifa->ifa_broadcast = ifa->ifa_address |
1202 						     ~ifa->ifa_mask;
1203 		} else {
1204 			ifa->ifa_prefixlen = 32;
1205 			ifa->ifa_mask = inet_make_mask(32);
1206 		}
1207 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1208 		ret = inet_set_ifa(dev, ifa);
1209 		break;
1210 
1211 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1212 		ret = 0;
1213 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1214 			inet_del_ifa(in_dev, ifap, 0);
1215 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1216 			inet_insert_ifa(ifa);
1217 		}
1218 		break;
1219 
1220 	case SIOCSIFDSTADDR:	/* Set the destination address */
1221 		ret = 0;
1222 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1223 			break;
1224 		ret = -EINVAL;
1225 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1226 			break;
1227 		ret = 0;
1228 		inet_del_ifa(in_dev, ifap, 0);
1229 		ifa->ifa_address = sin->sin_addr.s_addr;
1230 		inet_insert_ifa(ifa);
1231 		break;
1232 
1233 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1234 
1235 		/*
1236 		 *	The mask we set must be legal.
1237 		 */
1238 		ret = -EINVAL;
1239 		if (bad_mask(sin->sin_addr.s_addr, 0))
1240 			break;
1241 		ret = 0;
1242 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1243 			__be32 old_mask = ifa->ifa_mask;
1244 			inet_del_ifa(in_dev, ifap, 0);
1245 			ifa->ifa_mask = sin->sin_addr.s_addr;
1246 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1247 
1248 			/* See if current broadcast address matches
1249 			 * with current netmask, then recalculate
1250 			 * the broadcast address. Otherwise it's a
1251 			 * funny address, so don't touch it since
1252 			 * the user seems to know what (s)he's doing...
1253 			 */
1254 			if ((dev->flags & IFF_BROADCAST) &&
1255 			    (ifa->ifa_prefixlen < 31) &&
1256 			    (ifa->ifa_broadcast ==
1257 			     (ifa->ifa_local|~old_mask))) {
1258 				ifa->ifa_broadcast = (ifa->ifa_local |
1259 						      ~sin->sin_addr.s_addr);
1260 			}
1261 			inet_insert_ifa(ifa);
1262 		}
1263 		break;
1264 	}
1265 done:
1266 	rtnl_unlock();
1267 out:
1268 	return ret;
1269 }
1270 
1271 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1272 {
1273 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1274 	const struct in_ifaddr *ifa;
1275 	struct ifreq ifr;
1276 	int done = 0;
1277 
1278 	if (WARN_ON(size > sizeof(struct ifreq)))
1279 		goto out;
1280 
1281 	if (!in_dev)
1282 		goto out;
1283 
1284 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1285 		if (!buf) {
1286 			done += size;
1287 			continue;
1288 		}
1289 		if (len < size)
1290 			break;
1291 		memset(&ifr, 0, sizeof(struct ifreq));
1292 		strcpy(ifr.ifr_name, ifa->ifa_label);
1293 
1294 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1295 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1296 								ifa->ifa_local;
1297 
1298 		if (copy_to_user(buf + done, &ifr, size)) {
1299 			done = -EFAULT;
1300 			break;
1301 		}
1302 		len  -= size;
1303 		done += size;
1304 	}
1305 out:
1306 	return done;
1307 }
1308 
1309 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1310 				 int scope)
1311 {
1312 	const struct in_ifaddr *ifa;
1313 
1314 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1315 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1316 			continue;
1317 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1318 		    ifa->ifa_scope <= scope)
1319 			return ifa->ifa_local;
1320 	}
1321 
1322 	return 0;
1323 }
1324 
1325 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1326 {
1327 	const struct in_ifaddr *ifa;
1328 	__be32 addr = 0;
1329 	unsigned char localnet_scope = RT_SCOPE_HOST;
1330 	struct in_device *in_dev;
1331 	struct net *net = dev_net(dev);
1332 	int master_idx;
1333 
1334 	rcu_read_lock();
1335 	in_dev = __in_dev_get_rcu(dev);
1336 	if (!in_dev)
1337 		goto no_in_dev;
1338 
1339 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1340 		localnet_scope = RT_SCOPE_LINK;
1341 
1342 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1343 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1344 			continue;
1345 		if (min(ifa->ifa_scope, localnet_scope) > scope)
1346 			continue;
1347 		if (!dst || inet_ifa_match(dst, ifa)) {
1348 			addr = ifa->ifa_local;
1349 			break;
1350 		}
1351 		if (!addr)
1352 			addr = ifa->ifa_local;
1353 	}
1354 
1355 	if (addr)
1356 		goto out_unlock;
1357 no_in_dev:
1358 	master_idx = l3mdev_master_ifindex_rcu(dev);
1359 
1360 	/* For VRFs, the VRF device takes the place of the loopback device,
1361 	 * with addresses on it being preferred.  Note in such cases the
1362 	 * loopback device will be among the devices that fail the master_idx
1363 	 * equality check in the loop below.
1364 	 */
1365 	if (master_idx &&
1366 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1367 	    (in_dev = __in_dev_get_rcu(dev))) {
1368 		addr = in_dev_select_addr(in_dev, scope);
1369 		if (addr)
1370 			goto out_unlock;
1371 	}
1372 
1373 	/* Not loopback addresses on loopback should be preferred
1374 	   in this case. It is important that lo is the first interface
1375 	   in dev_base list.
1376 	 */
1377 	for_each_netdev_rcu(net, dev) {
1378 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1379 			continue;
1380 
1381 		in_dev = __in_dev_get_rcu(dev);
1382 		if (!in_dev)
1383 			continue;
1384 
1385 		addr = in_dev_select_addr(in_dev, scope);
1386 		if (addr)
1387 			goto out_unlock;
1388 	}
1389 out_unlock:
1390 	rcu_read_unlock();
1391 	return addr;
1392 }
1393 EXPORT_SYMBOL(inet_select_addr);
1394 
1395 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1396 			      __be32 local, int scope)
1397 {
1398 	unsigned char localnet_scope = RT_SCOPE_HOST;
1399 	const struct in_ifaddr *ifa;
1400 	__be32 addr = 0;
1401 	int same = 0;
1402 
1403 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1404 		localnet_scope = RT_SCOPE_LINK;
1405 
1406 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1407 		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1408 
1409 		if (!addr &&
1410 		    (local == ifa->ifa_local || !local) &&
1411 		    min_scope <= scope) {
1412 			addr = ifa->ifa_local;
1413 			if (same)
1414 				break;
1415 		}
1416 		if (!same) {
1417 			same = (!local || inet_ifa_match(local, ifa)) &&
1418 				(!dst || inet_ifa_match(dst, ifa));
1419 			if (same && addr) {
1420 				if (local || !dst)
1421 					break;
1422 				/* Is the selected addr into dst subnet? */
1423 				if (inet_ifa_match(addr, ifa))
1424 					break;
1425 				/* No, then can we use new local src? */
1426 				if (min_scope <= scope) {
1427 					addr = ifa->ifa_local;
1428 					break;
1429 				}
1430 				/* search for large dst subnet for addr */
1431 				same = 0;
1432 			}
1433 		}
1434 	}
1435 
1436 	return same ? addr : 0;
1437 }
1438 
1439 /*
1440  * Confirm that local IP address exists using wildcards:
1441  * - net: netns to check, cannot be NULL
1442  * - in_dev: only on this interface, NULL=any interface
1443  * - dst: only in the same subnet as dst, 0=any dst
1444  * - local: address, 0=autoselect the local address
1445  * - scope: maximum allowed scope value for the local address
1446  */
1447 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1448 			 __be32 dst, __be32 local, int scope)
1449 {
1450 	__be32 addr = 0;
1451 	struct net_device *dev;
1452 
1453 	if (in_dev)
1454 		return confirm_addr_indev(in_dev, dst, local, scope);
1455 
1456 	rcu_read_lock();
1457 	for_each_netdev_rcu(net, dev) {
1458 		in_dev = __in_dev_get_rcu(dev);
1459 		if (in_dev) {
1460 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1461 			if (addr)
1462 				break;
1463 		}
1464 	}
1465 	rcu_read_unlock();
1466 
1467 	return addr;
1468 }
1469 EXPORT_SYMBOL(inet_confirm_addr);
1470 
1471 /*
1472  *	Device notifier
1473  */
1474 
1475 int register_inetaddr_notifier(struct notifier_block *nb)
1476 {
1477 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1478 }
1479 EXPORT_SYMBOL(register_inetaddr_notifier);
1480 
1481 int unregister_inetaddr_notifier(struct notifier_block *nb)
1482 {
1483 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1484 }
1485 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1486 
1487 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1488 {
1489 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1490 }
1491 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1492 
1493 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1494 {
1495 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1496 	    nb);
1497 }
1498 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1499 
1500 /* Rename ifa_labels for a device name change. Make some effort to preserve
1501  * existing alias numbering and to create unique labels if possible.
1502 */
1503 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1504 {
1505 	struct in_ifaddr *ifa;
1506 	int named = 0;
1507 
1508 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1509 		char old[IFNAMSIZ], *dot;
1510 
1511 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1512 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1513 		if (named++ == 0)
1514 			goto skip;
1515 		dot = strchr(old, ':');
1516 		if (!dot) {
1517 			sprintf(old, ":%d", named);
1518 			dot = old;
1519 		}
1520 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1521 			strcat(ifa->ifa_label, dot);
1522 		else
1523 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1524 skip:
1525 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1526 	}
1527 }
1528 
1529 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1530 					struct in_device *in_dev)
1531 
1532 {
1533 	const struct in_ifaddr *ifa;
1534 
1535 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1536 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1537 			 ifa->ifa_local, dev,
1538 			 ifa->ifa_local, NULL,
1539 			 dev->dev_addr, NULL);
1540 	}
1541 }
1542 
1543 /* Called only under RTNL semaphore */
1544 
1545 static int inetdev_event(struct notifier_block *this, unsigned long event,
1546 			 void *ptr)
1547 {
1548 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1549 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1550 
1551 	ASSERT_RTNL();
1552 
1553 	if (!in_dev) {
1554 		if (event == NETDEV_REGISTER) {
1555 			in_dev = inetdev_init(dev);
1556 			if (IS_ERR(in_dev))
1557 				return notifier_from_errno(PTR_ERR(in_dev));
1558 			if (dev->flags & IFF_LOOPBACK) {
1559 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1560 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1561 			}
1562 		} else if (event == NETDEV_CHANGEMTU) {
1563 			/* Re-enabling IP */
1564 			if (inetdev_valid_mtu(dev->mtu))
1565 				in_dev = inetdev_init(dev);
1566 		}
1567 		goto out;
1568 	}
1569 
1570 	switch (event) {
1571 	case NETDEV_REGISTER:
1572 		pr_debug("%s: bug\n", __func__);
1573 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1574 		break;
1575 	case NETDEV_UP:
1576 		if (!inetdev_valid_mtu(dev->mtu))
1577 			break;
1578 		if (dev->flags & IFF_LOOPBACK) {
1579 			struct in_ifaddr *ifa = inet_alloc_ifa();
1580 
1581 			if (ifa) {
1582 				INIT_HLIST_NODE(&ifa->hash);
1583 				ifa->ifa_local =
1584 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1585 				ifa->ifa_prefixlen = 8;
1586 				ifa->ifa_mask = inet_make_mask(8);
1587 				in_dev_hold(in_dev);
1588 				ifa->ifa_dev = in_dev;
1589 				ifa->ifa_scope = RT_SCOPE_HOST;
1590 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1591 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1592 						 INFINITY_LIFE_TIME);
1593 				ipv4_devconf_setall(in_dev);
1594 				neigh_parms_data_state_setall(in_dev->arp_parms);
1595 				inet_insert_ifa(ifa);
1596 			}
1597 		}
1598 		ip_mc_up(in_dev);
1599 		fallthrough;
1600 	case NETDEV_CHANGEADDR:
1601 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1602 			break;
1603 		fallthrough;
1604 	case NETDEV_NOTIFY_PEERS:
1605 		/* Send gratuitous ARP to notify of link change */
1606 		inetdev_send_gratuitous_arp(dev, in_dev);
1607 		break;
1608 	case NETDEV_DOWN:
1609 		ip_mc_down(in_dev);
1610 		break;
1611 	case NETDEV_PRE_TYPE_CHANGE:
1612 		ip_mc_unmap(in_dev);
1613 		break;
1614 	case NETDEV_POST_TYPE_CHANGE:
1615 		ip_mc_remap(in_dev);
1616 		break;
1617 	case NETDEV_CHANGEMTU:
1618 		if (inetdev_valid_mtu(dev->mtu))
1619 			break;
1620 		/* disable IP when MTU is not enough */
1621 		fallthrough;
1622 	case NETDEV_UNREGISTER:
1623 		inetdev_destroy(in_dev);
1624 		break;
1625 	case NETDEV_CHANGENAME:
1626 		/* Do not notify about label change, this event is
1627 		 * not interesting to applications using netlink.
1628 		 */
1629 		inetdev_changename(dev, in_dev);
1630 
1631 		devinet_sysctl_unregister(in_dev);
1632 		devinet_sysctl_register(in_dev);
1633 		break;
1634 	}
1635 out:
1636 	return NOTIFY_DONE;
1637 }
1638 
1639 static struct notifier_block ip_netdev_notifier = {
1640 	.notifier_call = inetdev_event,
1641 };
1642 
1643 static size_t inet_nlmsg_size(void)
1644 {
1645 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1646 	       + nla_total_size(4) /* IFA_ADDRESS */
1647 	       + nla_total_size(4) /* IFA_LOCAL */
1648 	       + nla_total_size(4) /* IFA_BROADCAST */
1649 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1650 	       + nla_total_size(4)  /* IFA_FLAGS */
1651 	       + nla_total_size(1)  /* IFA_PROTO */
1652 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1653 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1654 }
1655 
1656 static inline u32 cstamp_delta(unsigned long cstamp)
1657 {
1658 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1659 }
1660 
1661 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1662 			 unsigned long tstamp, u32 preferred, u32 valid)
1663 {
1664 	struct ifa_cacheinfo ci;
1665 
1666 	ci.cstamp = cstamp_delta(cstamp);
1667 	ci.tstamp = cstamp_delta(tstamp);
1668 	ci.ifa_prefered = preferred;
1669 	ci.ifa_valid = valid;
1670 
1671 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1672 }
1673 
1674 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1675 			    struct inet_fill_args *args)
1676 {
1677 	struct ifaddrmsg *ifm;
1678 	struct nlmsghdr  *nlh;
1679 	u32 preferred, valid;
1680 
1681 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1682 			args->flags);
1683 	if (!nlh)
1684 		return -EMSGSIZE;
1685 
1686 	ifm = nlmsg_data(nlh);
1687 	ifm->ifa_family = AF_INET;
1688 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1689 	ifm->ifa_flags = ifa->ifa_flags;
1690 	ifm->ifa_scope = ifa->ifa_scope;
1691 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1692 
1693 	if (args->netnsid >= 0 &&
1694 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1695 		goto nla_put_failure;
1696 
1697 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1698 		preferred = ifa->ifa_preferred_lft;
1699 		valid = ifa->ifa_valid_lft;
1700 		if (preferred != INFINITY_LIFE_TIME) {
1701 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1702 
1703 			if (preferred > tval)
1704 				preferred -= tval;
1705 			else
1706 				preferred = 0;
1707 			if (valid != INFINITY_LIFE_TIME) {
1708 				if (valid > tval)
1709 					valid -= tval;
1710 				else
1711 					valid = 0;
1712 			}
1713 		}
1714 	} else {
1715 		preferred = INFINITY_LIFE_TIME;
1716 		valid = INFINITY_LIFE_TIME;
1717 	}
1718 	if ((ifa->ifa_address &&
1719 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1720 	    (ifa->ifa_local &&
1721 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1722 	    (ifa->ifa_broadcast &&
1723 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1724 	    (ifa->ifa_label[0] &&
1725 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1726 	    (ifa->ifa_proto &&
1727 	     nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1728 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1729 	    (ifa->ifa_rt_priority &&
1730 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1731 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1732 			  preferred, valid))
1733 		goto nla_put_failure;
1734 
1735 	nlmsg_end(skb, nlh);
1736 	return 0;
1737 
1738 nla_put_failure:
1739 	nlmsg_cancel(skb, nlh);
1740 	return -EMSGSIZE;
1741 }
1742 
1743 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1744 				      struct inet_fill_args *fillargs,
1745 				      struct net **tgt_net, struct sock *sk,
1746 				      struct netlink_callback *cb)
1747 {
1748 	struct netlink_ext_ack *extack = cb->extack;
1749 	struct nlattr *tb[IFA_MAX+1];
1750 	struct ifaddrmsg *ifm;
1751 	int err, i;
1752 
1753 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1754 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1755 		return -EINVAL;
1756 	}
1757 
1758 	ifm = nlmsg_data(nlh);
1759 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1760 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1761 		return -EINVAL;
1762 	}
1763 
1764 	fillargs->ifindex = ifm->ifa_index;
1765 	if (fillargs->ifindex) {
1766 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1767 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1768 	}
1769 
1770 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1771 					    ifa_ipv4_policy, extack);
1772 	if (err < 0)
1773 		return err;
1774 
1775 	for (i = 0; i <= IFA_MAX; ++i) {
1776 		if (!tb[i])
1777 			continue;
1778 
1779 		if (i == IFA_TARGET_NETNSID) {
1780 			struct net *net;
1781 
1782 			fillargs->netnsid = nla_get_s32(tb[i]);
1783 
1784 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1785 			if (IS_ERR(net)) {
1786 				fillargs->netnsid = -1;
1787 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1788 				return PTR_ERR(net);
1789 			}
1790 			*tgt_net = net;
1791 		} else {
1792 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1793 			return -EINVAL;
1794 		}
1795 	}
1796 
1797 	return 0;
1798 }
1799 
1800 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1801 			    struct netlink_callback *cb, int s_ip_idx,
1802 			    struct inet_fill_args *fillargs)
1803 {
1804 	struct in_ifaddr *ifa;
1805 	int ip_idx = 0;
1806 	int err;
1807 
1808 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1809 		if (ip_idx < s_ip_idx) {
1810 			ip_idx++;
1811 			continue;
1812 		}
1813 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1814 		if (err < 0)
1815 			goto done;
1816 
1817 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1818 		ip_idx++;
1819 	}
1820 	err = 0;
1821 
1822 done:
1823 	cb->args[2] = ip_idx;
1824 
1825 	return err;
1826 }
1827 
1828 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1829 {
1830 	const struct nlmsghdr *nlh = cb->nlh;
1831 	struct inet_fill_args fillargs = {
1832 		.portid = NETLINK_CB(cb->skb).portid,
1833 		.seq = nlh->nlmsg_seq,
1834 		.event = RTM_NEWADDR,
1835 		.flags = NLM_F_MULTI,
1836 		.netnsid = -1,
1837 	};
1838 	struct net *net = sock_net(skb->sk);
1839 	struct net *tgt_net = net;
1840 	int h, s_h;
1841 	int idx, s_idx;
1842 	int s_ip_idx;
1843 	struct net_device *dev;
1844 	struct in_device *in_dev;
1845 	struct hlist_head *head;
1846 	int err = 0;
1847 
1848 	s_h = cb->args[0];
1849 	s_idx = idx = cb->args[1];
1850 	s_ip_idx = cb->args[2];
1851 
1852 	if (cb->strict_check) {
1853 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1854 						 skb->sk, cb);
1855 		if (err < 0)
1856 			goto put_tgt_net;
1857 
1858 		err = 0;
1859 		if (fillargs.ifindex) {
1860 			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1861 			if (!dev) {
1862 				err = -ENODEV;
1863 				goto put_tgt_net;
1864 			}
1865 
1866 			in_dev = __in_dev_get_rtnl(dev);
1867 			if (in_dev) {
1868 				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1869 						       &fillargs);
1870 			}
1871 			goto put_tgt_net;
1872 		}
1873 	}
1874 
1875 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1876 		idx = 0;
1877 		head = &tgt_net->dev_index_head[h];
1878 		rcu_read_lock();
1879 		cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1880 			  tgt_net->dev_base_seq;
1881 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1882 			if (idx < s_idx)
1883 				goto cont;
1884 			if (h > s_h || idx > s_idx)
1885 				s_ip_idx = 0;
1886 			in_dev = __in_dev_get_rcu(dev);
1887 			if (!in_dev)
1888 				goto cont;
1889 
1890 			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1891 					       &fillargs);
1892 			if (err < 0) {
1893 				rcu_read_unlock();
1894 				goto done;
1895 			}
1896 cont:
1897 			idx++;
1898 		}
1899 		rcu_read_unlock();
1900 	}
1901 
1902 done:
1903 	cb->args[0] = h;
1904 	cb->args[1] = idx;
1905 put_tgt_net:
1906 	if (fillargs.netnsid >= 0)
1907 		put_net(tgt_net);
1908 
1909 	return skb->len ? : err;
1910 }
1911 
1912 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1913 		      u32 portid)
1914 {
1915 	struct inet_fill_args fillargs = {
1916 		.portid = portid,
1917 		.seq = nlh ? nlh->nlmsg_seq : 0,
1918 		.event = event,
1919 		.flags = 0,
1920 		.netnsid = -1,
1921 	};
1922 	struct sk_buff *skb;
1923 	int err = -ENOBUFS;
1924 	struct net *net;
1925 
1926 	net = dev_net(ifa->ifa_dev->dev);
1927 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1928 	if (!skb)
1929 		goto errout;
1930 
1931 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1932 	if (err < 0) {
1933 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1934 		WARN_ON(err == -EMSGSIZE);
1935 		kfree_skb(skb);
1936 		goto errout;
1937 	}
1938 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1939 	return;
1940 errout:
1941 	if (err < 0)
1942 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1943 }
1944 
1945 static size_t inet_get_link_af_size(const struct net_device *dev,
1946 				    u32 ext_filter_mask)
1947 {
1948 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1949 
1950 	if (!in_dev)
1951 		return 0;
1952 
1953 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1954 }
1955 
1956 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1957 			     u32 ext_filter_mask)
1958 {
1959 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1960 	struct nlattr *nla;
1961 	int i;
1962 
1963 	if (!in_dev)
1964 		return -ENODATA;
1965 
1966 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1967 	if (!nla)
1968 		return -EMSGSIZE;
1969 
1970 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1971 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1972 
1973 	return 0;
1974 }
1975 
1976 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1977 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1978 };
1979 
1980 static int inet_validate_link_af(const struct net_device *dev,
1981 				 const struct nlattr *nla,
1982 				 struct netlink_ext_ack *extack)
1983 {
1984 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1985 	int err, rem;
1986 
1987 	if (dev && !__in_dev_get_rtnl(dev))
1988 		return -EAFNOSUPPORT;
1989 
1990 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1991 					  inet_af_policy, extack);
1992 	if (err < 0)
1993 		return err;
1994 
1995 	if (tb[IFLA_INET_CONF]) {
1996 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1997 			int cfgid = nla_type(a);
1998 
1999 			if (nla_len(a) < 4)
2000 				return -EINVAL;
2001 
2002 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2003 				return -EINVAL;
2004 		}
2005 	}
2006 
2007 	return 0;
2008 }
2009 
2010 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2011 			    struct netlink_ext_ack *extack)
2012 {
2013 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
2014 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
2015 	int rem;
2016 
2017 	if (!in_dev)
2018 		return -EAFNOSUPPORT;
2019 
2020 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2021 		return -EINVAL;
2022 
2023 	if (tb[IFLA_INET_CONF]) {
2024 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2025 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2026 	}
2027 
2028 	return 0;
2029 }
2030 
2031 static int inet_netconf_msgsize_devconf(int type)
2032 {
2033 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2034 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2035 	bool all = false;
2036 
2037 	if (type == NETCONFA_ALL)
2038 		all = true;
2039 
2040 	if (all || type == NETCONFA_FORWARDING)
2041 		size += nla_total_size(4);
2042 	if (all || type == NETCONFA_RP_FILTER)
2043 		size += nla_total_size(4);
2044 	if (all || type == NETCONFA_MC_FORWARDING)
2045 		size += nla_total_size(4);
2046 	if (all || type == NETCONFA_BC_FORWARDING)
2047 		size += nla_total_size(4);
2048 	if (all || type == NETCONFA_PROXY_NEIGH)
2049 		size += nla_total_size(4);
2050 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2051 		size += nla_total_size(4);
2052 
2053 	return size;
2054 }
2055 
2056 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2057 				     struct ipv4_devconf *devconf, u32 portid,
2058 				     u32 seq, int event, unsigned int flags,
2059 				     int type)
2060 {
2061 	struct nlmsghdr  *nlh;
2062 	struct netconfmsg *ncm;
2063 	bool all = false;
2064 
2065 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2066 			flags);
2067 	if (!nlh)
2068 		return -EMSGSIZE;
2069 
2070 	if (type == NETCONFA_ALL)
2071 		all = true;
2072 
2073 	ncm = nlmsg_data(nlh);
2074 	ncm->ncm_family = AF_INET;
2075 
2076 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2077 		goto nla_put_failure;
2078 
2079 	if (!devconf)
2080 		goto out;
2081 
2082 	if ((all || type == NETCONFA_FORWARDING) &&
2083 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2084 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2085 		goto nla_put_failure;
2086 	if ((all || type == NETCONFA_RP_FILTER) &&
2087 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2088 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2089 		goto nla_put_failure;
2090 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2091 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2092 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2093 		goto nla_put_failure;
2094 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2095 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2096 			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2097 		goto nla_put_failure;
2098 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2099 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2100 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2101 		goto nla_put_failure;
2102 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2103 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2104 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2105 		goto nla_put_failure;
2106 
2107 out:
2108 	nlmsg_end(skb, nlh);
2109 	return 0;
2110 
2111 nla_put_failure:
2112 	nlmsg_cancel(skb, nlh);
2113 	return -EMSGSIZE;
2114 }
2115 
2116 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2117 				 int ifindex, struct ipv4_devconf *devconf)
2118 {
2119 	struct sk_buff *skb;
2120 	int err = -ENOBUFS;
2121 
2122 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2123 	if (!skb)
2124 		goto errout;
2125 
2126 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2127 					event, 0, type);
2128 	if (err < 0) {
2129 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2130 		WARN_ON(err == -EMSGSIZE);
2131 		kfree_skb(skb);
2132 		goto errout;
2133 	}
2134 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2135 	return;
2136 errout:
2137 	if (err < 0)
2138 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2139 }
2140 
2141 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2142 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2143 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2144 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2145 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2146 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2147 };
2148 
2149 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2150 				      const struct nlmsghdr *nlh,
2151 				      struct nlattr **tb,
2152 				      struct netlink_ext_ack *extack)
2153 {
2154 	int i, err;
2155 
2156 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2157 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2158 		return -EINVAL;
2159 	}
2160 
2161 	if (!netlink_strict_get_check(skb))
2162 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2163 					      tb, NETCONFA_MAX,
2164 					      devconf_ipv4_policy, extack);
2165 
2166 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2167 					    tb, NETCONFA_MAX,
2168 					    devconf_ipv4_policy, extack);
2169 	if (err)
2170 		return err;
2171 
2172 	for (i = 0; i <= NETCONFA_MAX; i++) {
2173 		if (!tb[i])
2174 			continue;
2175 
2176 		switch (i) {
2177 		case NETCONFA_IFINDEX:
2178 			break;
2179 		default:
2180 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2181 			return -EINVAL;
2182 		}
2183 	}
2184 
2185 	return 0;
2186 }
2187 
2188 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2189 				    struct nlmsghdr *nlh,
2190 				    struct netlink_ext_ack *extack)
2191 {
2192 	struct net *net = sock_net(in_skb->sk);
2193 	struct nlattr *tb[NETCONFA_MAX+1];
2194 	struct sk_buff *skb;
2195 	struct ipv4_devconf *devconf;
2196 	struct in_device *in_dev;
2197 	struct net_device *dev;
2198 	int ifindex;
2199 	int err;
2200 
2201 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2202 	if (err)
2203 		goto errout;
2204 
2205 	err = -EINVAL;
2206 	if (!tb[NETCONFA_IFINDEX])
2207 		goto errout;
2208 
2209 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2210 	switch (ifindex) {
2211 	case NETCONFA_IFINDEX_ALL:
2212 		devconf = net->ipv4.devconf_all;
2213 		break;
2214 	case NETCONFA_IFINDEX_DEFAULT:
2215 		devconf = net->ipv4.devconf_dflt;
2216 		break;
2217 	default:
2218 		dev = __dev_get_by_index(net, ifindex);
2219 		if (!dev)
2220 			goto errout;
2221 		in_dev = __in_dev_get_rtnl(dev);
2222 		if (!in_dev)
2223 			goto errout;
2224 		devconf = &in_dev->cnf;
2225 		break;
2226 	}
2227 
2228 	err = -ENOBUFS;
2229 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2230 	if (!skb)
2231 		goto errout;
2232 
2233 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2234 					NETLINK_CB(in_skb).portid,
2235 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2236 					NETCONFA_ALL);
2237 	if (err < 0) {
2238 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2239 		WARN_ON(err == -EMSGSIZE);
2240 		kfree_skb(skb);
2241 		goto errout;
2242 	}
2243 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2244 errout:
2245 	return err;
2246 }
2247 
2248 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2249 				     struct netlink_callback *cb)
2250 {
2251 	const struct nlmsghdr *nlh = cb->nlh;
2252 	struct net *net = sock_net(skb->sk);
2253 	int h, s_h;
2254 	int idx, s_idx;
2255 	struct net_device *dev;
2256 	struct in_device *in_dev;
2257 	struct hlist_head *head;
2258 
2259 	if (cb->strict_check) {
2260 		struct netlink_ext_ack *extack = cb->extack;
2261 		struct netconfmsg *ncm;
2262 
2263 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2264 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2265 			return -EINVAL;
2266 		}
2267 
2268 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2269 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2270 			return -EINVAL;
2271 		}
2272 	}
2273 
2274 	s_h = cb->args[0];
2275 	s_idx = idx = cb->args[1];
2276 
2277 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2278 		idx = 0;
2279 		head = &net->dev_index_head[h];
2280 		rcu_read_lock();
2281 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2282 			  net->dev_base_seq;
2283 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2284 			if (idx < s_idx)
2285 				goto cont;
2286 			in_dev = __in_dev_get_rcu(dev);
2287 			if (!in_dev)
2288 				goto cont;
2289 
2290 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2291 						      &in_dev->cnf,
2292 						      NETLINK_CB(cb->skb).portid,
2293 						      nlh->nlmsg_seq,
2294 						      RTM_NEWNETCONF,
2295 						      NLM_F_MULTI,
2296 						      NETCONFA_ALL) < 0) {
2297 				rcu_read_unlock();
2298 				goto done;
2299 			}
2300 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2301 cont:
2302 			idx++;
2303 		}
2304 		rcu_read_unlock();
2305 	}
2306 	if (h == NETDEV_HASHENTRIES) {
2307 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2308 					      net->ipv4.devconf_all,
2309 					      NETLINK_CB(cb->skb).portid,
2310 					      nlh->nlmsg_seq,
2311 					      RTM_NEWNETCONF, NLM_F_MULTI,
2312 					      NETCONFA_ALL) < 0)
2313 			goto done;
2314 		else
2315 			h++;
2316 	}
2317 	if (h == NETDEV_HASHENTRIES + 1) {
2318 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2319 					      net->ipv4.devconf_dflt,
2320 					      NETLINK_CB(cb->skb).portid,
2321 					      nlh->nlmsg_seq,
2322 					      RTM_NEWNETCONF, NLM_F_MULTI,
2323 					      NETCONFA_ALL) < 0)
2324 			goto done;
2325 		else
2326 			h++;
2327 	}
2328 done:
2329 	cb->args[0] = h;
2330 	cb->args[1] = idx;
2331 
2332 	return skb->len;
2333 }
2334 
2335 #ifdef CONFIG_SYSCTL
2336 
2337 static void devinet_copy_dflt_conf(struct net *net, int i)
2338 {
2339 	struct net_device *dev;
2340 
2341 	rcu_read_lock();
2342 	for_each_netdev_rcu(net, dev) {
2343 		struct in_device *in_dev;
2344 
2345 		in_dev = __in_dev_get_rcu(dev);
2346 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2347 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2348 	}
2349 	rcu_read_unlock();
2350 }
2351 
2352 /* called with RTNL locked */
2353 static void inet_forward_change(struct net *net)
2354 {
2355 	struct net_device *dev;
2356 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2357 
2358 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2359 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2360 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2361 				    NETCONFA_FORWARDING,
2362 				    NETCONFA_IFINDEX_ALL,
2363 				    net->ipv4.devconf_all);
2364 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2365 				    NETCONFA_FORWARDING,
2366 				    NETCONFA_IFINDEX_DEFAULT,
2367 				    net->ipv4.devconf_dflt);
2368 
2369 	for_each_netdev(net, dev) {
2370 		struct in_device *in_dev;
2371 
2372 		if (on)
2373 			dev_disable_lro(dev);
2374 
2375 		in_dev = __in_dev_get_rtnl(dev);
2376 		if (in_dev) {
2377 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2378 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2379 						    NETCONFA_FORWARDING,
2380 						    dev->ifindex, &in_dev->cnf);
2381 		}
2382 	}
2383 }
2384 
2385 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2386 {
2387 	if (cnf == net->ipv4.devconf_dflt)
2388 		return NETCONFA_IFINDEX_DEFAULT;
2389 	else if (cnf == net->ipv4.devconf_all)
2390 		return NETCONFA_IFINDEX_ALL;
2391 	else {
2392 		struct in_device *idev
2393 			= container_of(cnf, struct in_device, cnf);
2394 		return idev->dev->ifindex;
2395 	}
2396 }
2397 
2398 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2399 			     void *buffer, size_t *lenp, loff_t *ppos)
2400 {
2401 	int old_value = *(int *)ctl->data;
2402 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2403 	int new_value = *(int *)ctl->data;
2404 
2405 	if (write) {
2406 		struct ipv4_devconf *cnf = ctl->extra1;
2407 		struct net *net = ctl->extra2;
2408 		int i = (int *)ctl->data - cnf->data;
2409 		int ifindex;
2410 
2411 		set_bit(i, cnf->state);
2412 
2413 		if (cnf == net->ipv4.devconf_dflt)
2414 			devinet_copy_dflt_conf(net, i);
2415 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2416 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2417 			if ((new_value == 0) && (old_value != 0))
2418 				rt_cache_flush(net);
2419 
2420 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2421 		    new_value != old_value)
2422 			rt_cache_flush(net);
2423 
2424 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2425 		    new_value != old_value) {
2426 			ifindex = devinet_conf_ifindex(net, cnf);
2427 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2428 						    NETCONFA_RP_FILTER,
2429 						    ifindex, cnf);
2430 		}
2431 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2432 		    new_value != old_value) {
2433 			ifindex = devinet_conf_ifindex(net, cnf);
2434 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2435 						    NETCONFA_PROXY_NEIGH,
2436 						    ifindex, cnf);
2437 		}
2438 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2439 		    new_value != old_value) {
2440 			ifindex = devinet_conf_ifindex(net, cnf);
2441 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2442 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2443 						    ifindex, cnf);
2444 		}
2445 	}
2446 
2447 	return ret;
2448 }
2449 
2450 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2451 				  void *buffer, size_t *lenp, loff_t *ppos)
2452 {
2453 	int *valp = ctl->data;
2454 	int val = *valp;
2455 	loff_t pos = *ppos;
2456 	struct net *net = ctl->extra2;
2457 	int ret;
2458 
2459 	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2460 		return -EPERM;
2461 
2462 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2463 
2464 	if (write && *valp != val) {
2465 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2466 			if (!rtnl_trylock()) {
2467 				/* Restore the original values before restarting */
2468 				*valp = val;
2469 				*ppos = pos;
2470 				return restart_syscall();
2471 			}
2472 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2473 				inet_forward_change(net);
2474 			} else {
2475 				struct ipv4_devconf *cnf = ctl->extra1;
2476 				struct in_device *idev =
2477 					container_of(cnf, struct in_device, cnf);
2478 				if (*valp)
2479 					dev_disable_lro(idev->dev);
2480 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2481 							    NETCONFA_FORWARDING,
2482 							    idev->dev->ifindex,
2483 							    cnf);
2484 			}
2485 			rtnl_unlock();
2486 			rt_cache_flush(net);
2487 		} else
2488 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2489 						    NETCONFA_FORWARDING,
2490 						    NETCONFA_IFINDEX_DEFAULT,
2491 						    net->ipv4.devconf_dflt);
2492 	}
2493 
2494 	return ret;
2495 }
2496 
2497 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2498 				void *buffer, size_t *lenp, loff_t *ppos)
2499 {
2500 	int *valp = ctl->data;
2501 	int val = *valp;
2502 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2503 	struct net *net = ctl->extra2;
2504 
2505 	if (write && *valp != val)
2506 		rt_cache_flush(net);
2507 
2508 	return ret;
2509 }
2510 
2511 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2512 	{ \
2513 		.procname	= name, \
2514 		.data		= ipv4_devconf.data + \
2515 				  IPV4_DEVCONF_ ## attr - 1, \
2516 		.maxlen		= sizeof(int), \
2517 		.mode		= mval, \
2518 		.proc_handler	= proc, \
2519 		.extra1		= &ipv4_devconf, \
2520 	}
2521 
2522 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2523 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2524 
2525 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2526 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2527 
2528 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2529 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2530 
2531 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2532 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2533 
2534 static struct devinet_sysctl_table {
2535 	struct ctl_table_header *sysctl_header;
2536 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2537 } devinet_sysctl = {
2538 	.devinet_vars = {
2539 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2540 					     devinet_sysctl_forward),
2541 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2542 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2543 
2544 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2545 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2546 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2547 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2548 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2549 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2550 					"accept_source_route"),
2551 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2552 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2553 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2554 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2555 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2556 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2557 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2558 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2559 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2560 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2561 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2562 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2563 		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2564 					"arp_evict_nocarrier"),
2565 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2566 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2567 					"force_igmp_version"),
2568 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2569 					"igmpv2_unsolicited_report_interval"),
2570 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2571 					"igmpv3_unsolicited_report_interval"),
2572 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2573 					"ignore_routes_with_linkdown"),
2574 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2575 					"drop_gratuitous_arp"),
2576 
2577 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2578 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2579 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2580 					      "promote_secondaries"),
2581 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2582 					      "route_localnet"),
2583 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2584 					      "drop_unicast_in_l2_multicast"),
2585 	},
2586 };
2587 
2588 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2589 				     int ifindex, struct ipv4_devconf *p)
2590 {
2591 	int i;
2592 	struct devinet_sysctl_table *t;
2593 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2594 
2595 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2596 	if (!t)
2597 		goto out;
2598 
2599 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2600 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2601 		t->devinet_vars[i].extra1 = p;
2602 		t->devinet_vars[i].extra2 = net;
2603 	}
2604 
2605 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2606 
2607 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2608 	if (!t->sysctl_header)
2609 		goto free;
2610 
2611 	p->sysctl = t;
2612 
2613 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2614 				    ifindex, p);
2615 	return 0;
2616 
2617 free:
2618 	kfree(t);
2619 out:
2620 	return -ENOMEM;
2621 }
2622 
2623 static void __devinet_sysctl_unregister(struct net *net,
2624 					struct ipv4_devconf *cnf, int ifindex)
2625 {
2626 	struct devinet_sysctl_table *t = cnf->sysctl;
2627 
2628 	if (t) {
2629 		cnf->sysctl = NULL;
2630 		unregister_net_sysctl_table(t->sysctl_header);
2631 		kfree(t);
2632 	}
2633 
2634 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2635 }
2636 
2637 static int devinet_sysctl_register(struct in_device *idev)
2638 {
2639 	int err;
2640 
2641 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2642 		return -EINVAL;
2643 
2644 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2645 	if (err)
2646 		return err;
2647 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2648 					idev->dev->ifindex, &idev->cnf);
2649 	if (err)
2650 		neigh_sysctl_unregister(idev->arp_parms);
2651 	return err;
2652 }
2653 
2654 static void devinet_sysctl_unregister(struct in_device *idev)
2655 {
2656 	struct net *net = dev_net(idev->dev);
2657 
2658 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2659 	neigh_sysctl_unregister(idev->arp_parms);
2660 }
2661 
2662 static struct ctl_table ctl_forward_entry[] = {
2663 	{
2664 		.procname	= "ip_forward",
2665 		.data		= &ipv4_devconf.data[
2666 					IPV4_DEVCONF_FORWARDING - 1],
2667 		.maxlen		= sizeof(int),
2668 		.mode		= 0644,
2669 		.proc_handler	= devinet_sysctl_forward,
2670 		.extra1		= &ipv4_devconf,
2671 		.extra2		= &init_net,
2672 	},
2673 	{ },
2674 };
2675 #endif
2676 
2677 static __net_init int devinet_init_net(struct net *net)
2678 {
2679 	int err;
2680 	struct ipv4_devconf *all, *dflt;
2681 #ifdef CONFIG_SYSCTL
2682 	struct ctl_table *tbl;
2683 	struct ctl_table_header *forw_hdr;
2684 #endif
2685 
2686 	err = -ENOMEM;
2687 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2688 	if (!all)
2689 		goto err_alloc_all;
2690 
2691 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2692 	if (!dflt)
2693 		goto err_alloc_dflt;
2694 
2695 #ifdef CONFIG_SYSCTL
2696 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2697 	if (!tbl)
2698 		goto err_alloc_ctl;
2699 
2700 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2701 	tbl[0].extra1 = all;
2702 	tbl[0].extra2 = net;
2703 #endif
2704 
2705 	if (!net_eq(net, &init_net)) {
2706 		switch (net_inherit_devconf()) {
2707 		case 3:
2708 			/* copy from the current netns */
2709 			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2710 			       sizeof(ipv4_devconf));
2711 			memcpy(dflt,
2712 			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2713 			       sizeof(ipv4_devconf_dflt));
2714 			break;
2715 		case 0:
2716 		case 1:
2717 			/* copy from init_net */
2718 			memcpy(all, init_net.ipv4.devconf_all,
2719 			       sizeof(ipv4_devconf));
2720 			memcpy(dflt, init_net.ipv4.devconf_dflt,
2721 			       sizeof(ipv4_devconf_dflt));
2722 			break;
2723 		case 2:
2724 			/* use compiled values */
2725 			break;
2726 		}
2727 	}
2728 
2729 #ifdef CONFIG_SYSCTL
2730 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2731 	if (err < 0)
2732 		goto err_reg_all;
2733 
2734 	err = __devinet_sysctl_register(net, "default",
2735 					NETCONFA_IFINDEX_DEFAULT, dflt);
2736 	if (err < 0)
2737 		goto err_reg_dflt;
2738 
2739 	err = -ENOMEM;
2740 	forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2741 					  ARRAY_SIZE(ctl_forward_entry));
2742 	if (!forw_hdr)
2743 		goto err_reg_ctl;
2744 	net->ipv4.forw_hdr = forw_hdr;
2745 #endif
2746 
2747 	net->ipv4.devconf_all = all;
2748 	net->ipv4.devconf_dflt = dflt;
2749 	return 0;
2750 
2751 #ifdef CONFIG_SYSCTL
2752 err_reg_ctl:
2753 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2754 err_reg_dflt:
2755 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2756 err_reg_all:
2757 	kfree(tbl);
2758 err_alloc_ctl:
2759 #endif
2760 	kfree(dflt);
2761 err_alloc_dflt:
2762 	kfree(all);
2763 err_alloc_all:
2764 	return err;
2765 }
2766 
2767 static __net_exit void devinet_exit_net(struct net *net)
2768 {
2769 #ifdef CONFIG_SYSCTL
2770 	struct ctl_table *tbl;
2771 
2772 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2773 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2774 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2775 				    NETCONFA_IFINDEX_DEFAULT);
2776 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2777 				    NETCONFA_IFINDEX_ALL);
2778 	kfree(tbl);
2779 #endif
2780 	kfree(net->ipv4.devconf_dflt);
2781 	kfree(net->ipv4.devconf_all);
2782 }
2783 
2784 static __net_initdata struct pernet_operations devinet_ops = {
2785 	.init = devinet_init_net,
2786 	.exit = devinet_exit_net,
2787 };
2788 
2789 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2790 	.family		  = AF_INET,
2791 	.fill_link_af	  = inet_fill_link_af,
2792 	.get_link_af_size = inet_get_link_af_size,
2793 	.validate_link_af = inet_validate_link_af,
2794 	.set_link_af	  = inet_set_link_af,
2795 };
2796 
2797 void __init devinet_init(void)
2798 {
2799 	int i;
2800 
2801 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2802 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2803 
2804 	register_pernet_subsys(&devinet_ops);
2805 	register_netdevice_notifier(&ip_netdev_notifier);
2806 
2807 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2808 
2809 	rtnl_af_register(&inet_af_ops);
2810 
2811 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2812 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2813 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2814 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2815 		      inet_netconf_dump_devconf, 0);
2816 }
2817