xref: /openbmc/linux/net/ipv4/devinet.c (revision a93fbb00)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	NET3	IP device support routines.
4  *
5  *	Derived from the IP parts of dev.c 1.0.19
6  * 		Authors:	Ross Biro
7  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *	Additional Authors:
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *	Changes:
15  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16  *					lists.
17  *		Cyrus Durgin:		updated for kmod
18  *		Matthias Andree:	in devinet_ioctl, compare label and
19  *					address (4.4BSD alias style support),
20  *					fall back to comparing just the label
21  *					if no match found.
22  */
23 
24 
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56 
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64 
65 #define IPV6ONLY_FLAGS	\
66 		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79 	},
80 };
81 
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83 	.data = {
84 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92 	},
93 };
94 
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97 
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 	[IFA_LOCAL]     	= { .type = NLA_U32 },
100 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
101 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104 	[IFA_FLAGS]		= { .type = NLA_U32 },
105 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107 	[IFA_PROTO]		= { .type = NLA_U8 },
108 };
109 
110 struct inet_fill_args {
111 	u32 portid;
112 	u32 seq;
113 	int event;
114 	unsigned int flags;
115 	int netnsid;
116 	int ifindex;
117 };
118 
119 #define IN4_ADDR_HSIZE_SHIFT	8
120 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
121 
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123 
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
125 {
126 	u32 val = (__force u32) addr ^ net_hash_mix(net);
127 
128 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129 }
130 
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132 {
133 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
134 
135 	ASSERT_RTNL();
136 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137 }
138 
139 static void inet_hash_remove(struct in_ifaddr *ifa)
140 {
141 	ASSERT_RTNL();
142 	hlist_del_init_rcu(&ifa->hash);
143 }
144 
145 /**
146  * __ip_dev_find - find the first device with a given source address.
147  * @net: the net namespace
148  * @addr: the source address
149  * @devref: if true, take a reference on the found device
150  *
151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
152  */
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154 {
155 	struct net_device *result = NULL;
156 	struct in_ifaddr *ifa;
157 
158 	rcu_read_lock();
159 	ifa = inet_lookup_ifaddr_rcu(net, addr);
160 	if (!ifa) {
161 		struct flowi4 fl4 = { .daddr = addr };
162 		struct fib_result res = { 0 };
163 		struct fib_table *local;
164 
165 		/* Fallback to FIB local table so that communication
166 		 * over loopback subnets work.
167 		 */
168 		local = fib_get_table(net, RT_TABLE_LOCAL);
169 		if (local &&
170 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171 		    res.type == RTN_LOCAL)
172 			result = FIB_RES_DEV(res);
173 	} else {
174 		result = ifa->ifa_dev->dev;
175 	}
176 	if (result && devref)
177 		dev_hold(result);
178 	rcu_read_unlock();
179 	return result;
180 }
181 EXPORT_SYMBOL(__ip_dev_find);
182 
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185 {
186 	u32 hash = inet_addr_hash(net, addr);
187 	struct in_ifaddr *ifa;
188 
189 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190 		if (ifa->ifa_local == addr &&
191 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
192 			return ifa;
193 
194 	return NULL;
195 }
196 
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198 
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202 			 struct in_ifaddr __rcu **ifap,
203 			 int destroy);
204 #ifdef CONFIG_SYSCTL
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
207 #else
208 static int devinet_sysctl_register(struct in_device *idev)
209 {
210 	return 0;
211 }
212 static void devinet_sysctl_unregister(struct in_device *idev)
213 {
214 }
215 #endif
216 
217 /* Locks all the inet devices. */
218 
219 static struct in_ifaddr *inet_alloc_ifa(void)
220 {
221 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222 }
223 
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227 	if (ifa->ifa_dev)
228 		in_dev_put(ifa->ifa_dev);
229 	kfree(ifa);
230 }
231 
232 static void inet_free_ifa(struct in_ifaddr *ifa)
233 {
234 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
235 }
236 
237 void in_dev_finish_destroy(struct in_device *idev)
238 {
239 	struct net_device *dev = idev->dev;
240 
241 	WARN_ON(idev->ifa_list);
242 	WARN_ON(idev->mc_list);
243 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
244 #ifdef NET_REFCNT_DEBUG
245 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
246 #endif
247 	dev_put_track(dev, &idev->dev_tracker);
248 	if (!idev->dead)
249 		pr_err("Freeing alive in_device %p\n", idev);
250 	else
251 		kfree(idev);
252 }
253 EXPORT_SYMBOL(in_dev_finish_destroy);
254 
255 static struct in_device *inetdev_init(struct net_device *dev)
256 {
257 	struct in_device *in_dev;
258 	int err = -ENOMEM;
259 
260 	ASSERT_RTNL();
261 
262 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
263 	if (!in_dev)
264 		goto out;
265 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
266 			sizeof(in_dev->cnf));
267 	in_dev->cnf.sysctl = NULL;
268 	in_dev->dev = dev;
269 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
270 	if (!in_dev->arp_parms)
271 		goto out_kfree;
272 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
273 		dev_disable_lro(dev);
274 	/* Reference in_dev->dev */
275 	dev_hold_track(dev, &in_dev->dev_tracker, GFP_KERNEL);
276 	/* Account for reference dev->ip_ptr (below) */
277 	refcount_set(&in_dev->refcnt, 1);
278 
279 	err = devinet_sysctl_register(in_dev);
280 	if (err) {
281 		in_dev->dead = 1;
282 		neigh_parms_release(&arp_tbl, in_dev->arp_parms);
283 		in_dev_put(in_dev);
284 		in_dev = NULL;
285 		goto out;
286 	}
287 	ip_mc_init_dev(in_dev);
288 	if (dev->flags & IFF_UP)
289 		ip_mc_up(in_dev);
290 
291 	/* we can receive as soon as ip_ptr is set -- do this last */
292 	rcu_assign_pointer(dev->ip_ptr, in_dev);
293 out:
294 	return in_dev ?: ERR_PTR(err);
295 out_kfree:
296 	kfree(in_dev);
297 	in_dev = NULL;
298 	goto out;
299 }
300 
301 static void in_dev_rcu_put(struct rcu_head *head)
302 {
303 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
304 	in_dev_put(idev);
305 }
306 
307 static void inetdev_destroy(struct in_device *in_dev)
308 {
309 	struct net_device *dev;
310 	struct in_ifaddr *ifa;
311 
312 	ASSERT_RTNL();
313 
314 	dev = in_dev->dev;
315 
316 	in_dev->dead = 1;
317 
318 	ip_mc_destroy_dev(in_dev);
319 
320 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
321 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
322 		inet_free_ifa(ifa);
323 	}
324 
325 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
326 
327 	devinet_sysctl_unregister(in_dev);
328 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
329 	arp_ifdown(dev);
330 
331 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
332 }
333 
334 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
335 {
336 	const struct in_ifaddr *ifa;
337 
338 	rcu_read_lock();
339 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
340 		if (inet_ifa_match(a, ifa)) {
341 			if (!b || inet_ifa_match(b, ifa)) {
342 				rcu_read_unlock();
343 				return 1;
344 			}
345 		}
346 	}
347 	rcu_read_unlock();
348 	return 0;
349 }
350 
351 static void __inet_del_ifa(struct in_device *in_dev,
352 			   struct in_ifaddr __rcu **ifap,
353 			   int destroy, struct nlmsghdr *nlh, u32 portid)
354 {
355 	struct in_ifaddr *promote = NULL;
356 	struct in_ifaddr *ifa, *ifa1;
357 	struct in_ifaddr *last_prim;
358 	struct in_ifaddr *prev_prom = NULL;
359 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
360 
361 	ASSERT_RTNL();
362 
363 	ifa1 = rtnl_dereference(*ifap);
364 	last_prim = rtnl_dereference(in_dev->ifa_list);
365 	if (in_dev->dead)
366 		goto no_promotions;
367 
368 	/* 1. Deleting primary ifaddr forces deletion all secondaries
369 	 * unless alias promotion is set
370 	 **/
371 
372 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
373 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
374 
375 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
376 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
377 			    ifa1->ifa_scope <= ifa->ifa_scope)
378 				last_prim = ifa;
379 
380 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
381 			    ifa1->ifa_mask != ifa->ifa_mask ||
382 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
383 				ifap1 = &ifa->ifa_next;
384 				prev_prom = ifa;
385 				continue;
386 			}
387 
388 			if (!do_promote) {
389 				inet_hash_remove(ifa);
390 				*ifap1 = ifa->ifa_next;
391 
392 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
393 				blocking_notifier_call_chain(&inetaddr_chain,
394 						NETDEV_DOWN, ifa);
395 				inet_free_ifa(ifa);
396 			} else {
397 				promote = ifa;
398 				break;
399 			}
400 		}
401 	}
402 
403 	/* On promotion all secondaries from subnet are changing
404 	 * the primary IP, we must remove all their routes silently
405 	 * and later to add them back with new prefsrc. Do this
406 	 * while all addresses are on the device list.
407 	 */
408 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
409 		if (ifa1->ifa_mask == ifa->ifa_mask &&
410 		    inet_ifa_match(ifa1->ifa_address, ifa))
411 			fib_del_ifaddr(ifa, ifa1);
412 	}
413 
414 no_promotions:
415 	/* 2. Unlink it */
416 
417 	*ifap = ifa1->ifa_next;
418 	inet_hash_remove(ifa1);
419 
420 	/* 3. Announce address deletion */
421 
422 	/* Send message first, then call notifier.
423 	   At first sight, FIB update triggered by notifier
424 	   will refer to already deleted ifaddr, that could confuse
425 	   netlink listeners. It is not true: look, gated sees
426 	   that route deleted and if it still thinks that ifaddr
427 	   is valid, it will try to restore deleted routes... Grr.
428 	   So that, this order is correct.
429 	 */
430 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
431 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
432 
433 	if (promote) {
434 		struct in_ifaddr *next_sec;
435 
436 		next_sec = rtnl_dereference(promote->ifa_next);
437 		if (prev_prom) {
438 			struct in_ifaddr *last_sec;
439 
440 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
441 
442 			last_sec = rtnl_dereference(last_prim->ifa_next);
443 			rcu_assign_pointer(promote->ifa_next, last_sec);
444 			rcu_assign_pointer(last_prim->ifa_next, promote);
445 		}
446 
447 		promote->ifa_flags &= ~IFA_F_SECONDARY;
448 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
449 		blocking_notifier_call_chain(&inetaddr_chain,
450 				NETDEV_UP, promote);
451 		for (ifa = next_sec; ifa;
452 		     ifa = rtnl_dereference(ifa->ifa_next)) {
453 			if (ifa1->ifa_mask != ifa->ifa_mask ||
454 			    !inet_ifa_match(ifa1->ifa_address, ifa))
455 					continue;
456 			fib_add_ifaddr(ifa);
457 		}
458 
459 	}
460 	if (destroy)
461 		inet_free_ifa(ifa1);
462 }
463 
464 static void inet_del_ifa(struct in_device *in_dev,
465 			 struct in_ifaddr __rcu **ifap,
466 			 int destroy)
467 {
468 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
469 }
470 
471 static void check_lifetime(struct work_struct *work);
472 
473 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
474 
475 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
476 			     u32 portid, struct netlink_ext_ack *extack)
477 {
478 	struct in_ifaddr __rcu **last_primary, **ifap;
479 	struct in_device *in_dev = ifa->ifa_dev;
480 	struct in_validator_info ivi;
481 	struct in_ifaddr *ifa1;
482 	int ret;
483 
484 	ASSERT_RTNL();
485 
486 	if (!ifa->ifa_local) {
487 		inet_free_ifa(ifa);
488 		return 0;
489 	}
490 
491 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
492 	last_primary = &in_dev->ifa_list;
493 
494 	/* Don't set IPv6 only flags to IPv4 addresses */
495 	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
496 
497 	ifap = &in_dev->ifa_list;
498 	ifa1 = rtnl_dereference(*ifap);
499 
500 	while (ifa1) {
501 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
502 		    ifa->ifa_scope <= ifa1->ifa_scope)
503 			last_primary = &ifa1->ifa_next;
504 		if (ifa1->ifa_mask == ifa->ifa_mask &&
505 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
506 			if (ifa1->ifa_local == ifa->ifa_local) {
507 				inet_free_ifa(ifa);
508 				return -EEXIST;
509 			}
510 			if (ifa1->ifa_scope != ifa->ifa_scope) {
511 				inet_free_ifa(ifa);
512 				return -EINVAL;
513 			}
514 			ifa->ifa_flags |= IFA_F_SECONDARY;
515 		}
516 
517 		ifap = &ifa1->ifa_next;
518 		ifa1 = rtnl_dereference(*ifap);
519 	}
520 
521 	/* Allow any devices that wish to register ifaddr validtors to weigh
522 	 * in now, before changes are committed.  The rntl lock is serializing
523 	 * access here, so the state should not change between a validator call
524 	 * and a final notify on commit.  This isn't invoked on promotion under
525 	 * the assumption that validators are checking the address itself, and
526 	 * not the flags.
527 	 */
528 	ivi.ivi_addr = ifa->ifa_address;
529 	ivi.ivi_dev = ifa->ifa_dev;
530 	ivi.extack = extack;
531 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
532 					   NETDEV_UP, &ivi);
533 	ret = notifier_to_errno(ret);
534 	if (ret) {
535 		inet_free_ifa(ifa);
536 		return ret;
537 	}
538 
539 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
540 		prandom_seed((__force u32) ifa->ifa_local);
541 		ifap = last_primary;
542 	}
543 
544 	rcu_assign_pointer(ifa->ifa_next, *ifap);
545 	rcu_assign_pointer(*ifap, ifa);
546 
547 	inet_hash_insert(dev_net(in_dev->dev), ifa);
548 
549 	cancel_delayed_work(&check_lifetime_work);
550 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
551 
552 	/* Send message first, then call notifier.
553 	   Notifier will trigger FIB update, so that
554 	   listeners of netlink will know about new ifaddr */
555 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
556 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
557 
558 	return 0;
559 }
560 
561 static int inet_insert_ifa(struct in_ifaddr *ifa)
562 {
563 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
564 }
565 
566 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
567 {
568 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
569 
570 	ASSERT_RTNL();
571 
572 	if (!in_dev) {
573 		inet_free_ifa(ifa);
574 		return -ENOBUFS;
575 	}
576 	ipv4_devconf_setall(in_dev);
577 	neigh_parms_data_state_setall(in_dev->arp_parms);
578 	if (ifa->ifa_dev != in_dev) {
579 		WARN_ON(ifa->ifa_dev);
580 		in_dev_hold(in_dev);
581 		ifa->ifa_dev = in_dev;
582 	}
583 	if (ipv4_is_loopback(ifa->ifa_local))
584 		ifa->ifa_scope = RT_SCOPE_HOST;
585 	return inet_insert_ifa(ifa);
586 }
587 
588 /* Caller must hold RCU or RTNL :
589  * We dont take a reference on found in_device
590  */
591 struct in_device *inetdev_by_index(struct net *net, int ifindex)
592 {
593 	struct net_device *dev;
594 	struct in_device *in_dev = NULL;
595 
596 	rcu_read_lock();
597 	dev = dev_get_by_index_rcu(net, ifindex);
598 	if (dev)
599 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
600 	rcu_read_unlock();
601 	return in_dev;
602 }
603 EXPORT_SYMBOL(inetdev_by_index);
604 
605 /* Called only from RTNL semaphored context. No locks. */
606 
607 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
608 				    __be32 mask)
609 {
610 	struct in_ifaddr *ifa;
611 
612 	ASSERT_RTNL();
613 
614 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
615 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
616 			return ifa;
617 	}
618 	return NULL;
619 }
620 
621 static int ip_mc_autojoin_config(struct net *net, bool join,
622 				 const struct in_ifaddr *ifa)
623 {
624 #if defined(CONFIG_IP_MULTICAST)
625 	struct ip_mreqn mreq = {
626 		.imr_multiaddr.s_addr = ifa->ifa_address,
627 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
628 	};
629 	struct sock *sk = net->ipv4.mc_autojoin_sk;
630 	int ret;
631 
632 	ASSERT_RTNL();
633 
634 	lock_sock(sk);
635 	if (join)
636 		ret = ip_mc_join_group(sk, &mreq);
637 	else
638 		ret = ip_mc_leave_group(sk, &mreq);
639 	release_sock(sk);
640 
641 	return ret;
642 #else
643 	return -EOPNOTSUPP;
644 #endif
645 }
646 
647 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
648 			    struct netlink_ext_ack *extack)
649 {
650 	struct net *net = sock_net(skb->sk);
651 	struct in_ifaddr __rcu **ifap;
652 	struct nlattr *tb[IFA_MAX+1];
653 	struct in_device *in_dev;
654 	struct ifaddrmsg *ifm;
655 	struct in_ifaddr *ifa;
656 	int err;
657 
658 	ASSERT_RTNL();
659 
660 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
661 				     ifa_ipv4_policy, extack);
662 	if (err < 0)
663 		goto errout;
664 
665 	ifm = nlmsg_data(nlh);
666 	in_dev = inetdev_by_index(net, ifm->ifa_index);
667 	if (!in_dev) {
668 		err = -ENODEV;
669 		goto errout;
670 	}
671 
672 	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
673 	     ifap = &ifa->ifa_next) {
674 		if (tb[IFA_LOCAL] &&
675 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
676 			continue;
677 
678 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
679 			continue;
680 
681 		if (tb[IFA_ADDRESS] &&
682 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
683 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
684 			continue;
685 
686 		if (ipv4_is_multicast(ifa->ifa_address))
687 			ip_mc_autojoin_config(net, false, ifa);
688 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
689 		return 0;
690 	}
691 
692 	err = -EADDRNOTAVAIL;
693 errout:
694 	return err;
695 }
696 
697 #define INFINITY_LIFE_TIME	0xFFFFFFFF
698 
699 static void check_lifetime(struct work_struct *work)
700 {
701 	unsigned long now, next, next_sec, next_sched;
702 	struct in_ifaddr *ifa;
703 	struct hlist_node *n;
704 	int i;
705 
706 	now = jiffies;
707 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
708 
709 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
710 		bool change_needed = false;
711 
712 		rcu_read_lock();
713 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
714 			unsigned long age;
715 
716 			if (ifa->ifa_flags & IFA_F_PERMANENT)
717 				continue;
718 
719 			/* We try to batch several events at once. */
720 			age = (now - ifa->ifa_tstamp +
721 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
722 
723 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
724 			    age >= ifa->ifa_valid_lft) {
725 				change_needed = true;
726 			} else if (ifa->ifa_preferred_lft ==
727 				   INFINITY_LIFE_TIME) {
728 				continue;
729 			} else if (age >= ifa->ifa_preferred_lft) {
730 				if (time_before(ifa->ifa_tstamp +
731 						ifa->ifa_valid_lft * HZ, next))
732 					next = ifa->ifa_tstamp +
733 					       ifa->ifa_valid_lft * HZ;
734 
735 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
736 					change_needed = true;
737 			} else if (time_before(ifa->ifa_tstamp +
738 					       ifa->ifa_preferred_lft * HZ,
739 					       next)) {
740 				next = ifa->ifa_tstamp +
741 				       ifa->ifa_preferred_lft * HZ;
742 			}
743 		}
744 		rcu_read_unlock();
745 		if (!change_needed)
746 			continue;
747 		rtnl_lock();
748 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
749 			unsigned long age;
750 
751 			if (ifa->ifa_flags & IFA_F_PERMANENT)
752 				continue;
753 
754 			/* We try to batch several events at once. */
755 			age = (now - ifa->ifa_tstamp +
756 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
757 
758 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
759 			    age >= ifa->ifa_valid_lft) {
760 				struct in_ifaddr __rcu **ifap;
761 				struct in_ifaddr *tmp;
762 
763 				ifap = &ifa->ifa_dev->ifa_list;
764 				tmp = rtnl_dereference(*ifap);
765 				while (tmp) {
766 					if (tmp == ifa) {
767 						inet_del_ifa(ifa->ifa_dev,
768 							     ifap, 1);
769 						break;
770 					}
771 					ifap = &tmp->ifa_next;
772 					tmp = rtnl_dereference(*ifap);
773 				}
774 			} else if (ifa->ifa_preferred_lft !=
775 				   INFINITY_LIFE_TIME &&
776 				   age >= ifa->ifa_preferred_lft &&
777 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
778 				ifa->ifa_flags |= IFA_F_DEPRECATED;
779 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
780 			}
781 		}
782 		rtnl_unlock();
783 	}
784 
785 	next_sec = round_jiffies_up(next);
786 	next_sched = next;
787 
788 	/* If rounded timeout is accurate enough, accept it. */
789 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
790 		next_sched = next_sec;
791 
792 	now = jiffies;
793 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
794 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
795 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
796 
797 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
798 			next_sched - now);
799 }
800 
801 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
802 			     __u32 prefered_lft)
803 {
804 	unsigned long timeout;
805 
806 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
807 
808 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
809 	if (addrconf_finite_timeout(timeout))
810 		ifa->ifa_valid_lft = timeout;
811 	else
812 		ifa->ifa_flags |= IFA_F_PERMANENT;
813 
814 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
815 	if (addrconf_finite_timeout(timeout)) {
816 		if (timeout == 0)
817 			ifa->ifa_flags |= IFA_F_DEPRECATED;
818 		ifa->ifa_preferred_lft = timeout;
819 	}
820 	ifa->ifa_tstamp = jiffies;
821 	if (!ifa->ifa_cstamp)
822 		ifa->ifa_cstamp = ifa->ifa_tstamp;
823 }
824 
825 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
826 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
827 				       struct netlink_ext_ack *extack)
828 {
829 	struct nlattr *tb[IFA_MAX+1];
830 	struct in_ifaddr *ifa;
831 	struct ifaddrmsg *ifm;
832 	struct net_device *dev;
833 	struct in_device *in_dev;
834 	int err;
835 
836 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
837 				     ifa_ipv4_policy, extack);
838 	if (err < 0)
839 		goto errout;
840 
841 	ifm = nlmsg_data(nlh);
842 	err = -EINVAL;
843 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
844 		goto errout;
845 
846 	dev = __dev_get_by_index(net, ifm->ifa_index);
847 	err = -ENODEV;
848 	if (!dev)
849 		goto errout;
850 
851 	in_dev = __in_dev_get_rtnl(dev);
852 	err = -ENOBUFS;
853 	if (!in_dev)
854 		goto errout;
855 
856 	ifa = inet_alloc_ifa();
857 	if (!ifa)
858 		/*
859 		 * A potential indev allocation can be left alive, it stays
860 		 * assigned to its device and is destroy with it.
861 		 */
862 		goto errout;
863 
864 	ipv4_devconf_setall(in_dev);
865 	neigh_parms_data_state_setall(in_dev->arp_parms);
866 	in_dev_hold(in_dev);
867 
868 	if (!tb[IFA_ADDRESS])
869 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
870 
871 	INIT_HLIST_NODE(&ifa->hash);
872 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
873 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
874 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
875 					 ifm->ifa_flags;
876 	ifa->ifa_scope = ifm->ifa_scope;
877 	ifa->ifa_dev = in_dev;
878 
879 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
880 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
881 
882 	if (tb[IFA_BROADCAST])
883 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
884 
885 	if (tb[IFA_LABEL])
886 		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
887 	else
888 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
889 
890 	if (tb[IFA_RT_PRIORITY])
891 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
892 
893 	if (tb[IFA_PROTO])
894 		ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
895 
896 	if (tb[IFA_CACHEINFO]) {
897 		struct ifa_cacheinfo *ci;
898 
899 		ci = nla_data(tb[IFA_CACHEINFO]);
900 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
901 			err = -EINVAL;
902 			goto errout_free;
903 		}
904 		*pvalid_lft = ci->ifa_valid;
905 		*pprefered_lft = ci->ifa_prefered;
906 	}
907 
908 	return ifa;
909 
910 errout_free:
911 	inet_free_ifa(ifa);
912 errout:
913 	return ERR_PTR(err);
914 }
915 
916 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
917 {
918 	struct in_device *in_dev = ifa->ifa_dev;
919 	struct in_ifaddr *ifa1;
920 
921 	if (!ifa->ifa_local)
922 		return NULL;
923 
924 	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
925 		if (ifa1->ifa_mask == ifa->ifa_mask &&
926 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
927 		    ifa1->ifa_local == ifa->ifa_local)
928 			return ifa1;
929 	}
930 	return NULL;
931 }
932 
933 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
934 			    struct netlink_ext_ack *extack)
935 {
936 	struct net *net = sock_net(skb->sk);
937 	struct in_ifaddr *ifa;
938 	struct in_ifaddr *ifa_existing;
939 	__u32 valid_lft = INFINITY_LIFE_TIME;
940 	__u32 prefered_lft = INFINITY_LIFE_TIME;
941 
942 	ASSERT_RTNL();
943 
944 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
945 	if (IS_ERR(ifa))
946 		return PTR_ERR(ifa);
947 
948 	ifa_existing = find_matching_ifa(ifa);
949 	if (!ifa_existing) {
950 		/* It would be best to check for !NLM_F_CREATE here but
951 		 * userspace already relies on not having to provide this.
952 		 */
953 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
954 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
955 			int ret = ip_mc_autojoin_config(net, true, ifa);
956 
957 			if (ret < 0) {
958 				inet_free_ifa(ifa);
959 				return ret;
960 			}
961 		}
962 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
963 					 extack);
964 	} else {
965 		u32 new_metric = ifa->ifa_rt_priority;
966 
967 		inet_free_ifa(ifa);
968 
969 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
970 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
971 			return -EEXIST;
972 		ifa = ifa_existing;
973 
974 		if (ifa->ifa_rt_priority != new_metric) {
975 			fib_modify_prefix_metric(ifa, new_metric);
976 			ifa->ifa_rt_priority = new_metric;
977 		}
978 
979 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
980 		cancel_delayed_work(&check_lifetime_work);
981 		queue_delayed_work(system_power_efficient_wq,
982 				&check_lifetime_work, 0);
983 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
984 	}
985 	return 0;
986 }
987 
988 /*
989  *	Determine a default network mask, based on the IP address.
990  */
991 
992 static int inet_abc_len(__be32 addr)
993 {
994 	int rc = -1;	/* Something else, probably a multicast. */
995 
996 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
997 		rc = 0;
998 	else {
999 		__u32 haddr = ntohl(addr);
1000 		if (IN_CLASSA(haddr))
1001 			rc = 8;
1002 		else if (IN_CLASSB(haddr))
1003 			rc = 16;
1004 		else if (IN_CLASSC(haddr))
1005 			rc = 24;
1006 		else if (IN_CLASSE(haddr))
1007 			rc = 32;
1008 	}
1009 
1010 	return rc;
1011 }
1012 
1013 
1014 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1015 {
1016 	struct sockaddr_in sin_orig;
1017 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1018 	struct in_ifaddr __rcu **ifap = NULL;
1019 	struct in_device *in_dev;
1020 	struct in_ifaddr *ifa = NULL;
1021 	struct net_device *dev;
1022 	char *colon;
1023 	int ret = -EFAULT;
1024 	int tryaddrmatch = 0;
1025 
1026 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1027 
1028 	/* save original address for comparison */
1029 	memcpy(&sin_orig, sin, sizeof(*sin));
1030 
1031 	colon = strchr(ifr->ifr_name, ':');
1032 	if (colon)
1033 		*colon = 0;
1034 
1035 	dev_load(net, ifr->ifr_name);
1036 
1037 	switch (cmd) {
1038 	case SIOCGIFADDR:	/* Get interface address */
1039 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1040 	case SIOCGIFDSTADDR:	/* Get the destination address */
1041 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1042 		/* Note that these ioctls will not sleep,
1043 		   so that we do not impose a lock.
1044 		   One day we will be forced to put shlock here (I mean SMP)
1045 		 */
1046 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1047 		memset(sin, 0, sizeof(*sin));
1048 		sin->sin_family = AF_INET;
1049 		break;
1050 
1051 	case SIOCSIFFLAGS:
1052 		ret = -EPERM;
1053 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1054 			goto out;
1055 		break;
1056 	case SIOCSIFADDR:	/* Set interface address (and family) */
1057 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1058 	case SIOCSIFDSTADDR:	/* Set the destination address */
1059 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1060 		ret = -EPERM;
1061 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1062 			goto out;
1063 		ret = -EINVAL;
1064 		if (sin->sin_family != AF_INET)
1065 			goto out;
1066 		break;
1067 	default:
1068 		ret = -EINVAL;
1069 		goto out;
1070 	}
1071 
1072 	rtnl_lock();
1073 
1074 	ret = -ENODEV;
1075 	dev = __dev_get_by_name(net, ifr->ifr_name);
1076 	if (!dev)
1077 		goto done;
1078 
1079 	if (colon)
1080 		*colon = ':';
1081 
1082 	in_dev = __in_dev_get_rtnl(dev);
1083 	if (in_dev) {
1084 		if (tryaddrmatch) {
1085 			/* Matthias Andree */
1086 			/* compare label and address (4.4BSD style) */
1087 			/* note: we only do this for a limited set of ioctls
1088 			   and only if the original address family was AF_INET.
1089 			   This is checked above. */
1090 
1091 			for (ifap = &in_dev->ifa_list;
1092 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1093 			     ifap = &ifa->ifa_next) {
1094 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1095 				    sin_orig.sin_addr.s_addr ==
1096 							ifa->ifa_local) {
1097 					break; /* found */
1098 				}
1099 			}
1100 		}
1101 		/* we didn't get a match, maybe the application is
1102 		   4.3BSD-style and passed in junk so we fall back to
1103 		   comparing just the label */
1104 		if (!ifa) {
1105 			for (ifap = &in_dev->ifa_list;
1106 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1107 			     ifap = &ifa->ifa_next)
1108 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1109 					break;
1110 		}
1111 	}
1112 
1113 	ret = -EADDRNOTAVAIL;
1114 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1115 		goto done;
1116 
1117 	switch (cmd) {
1118 	case SIOCGIFADDR:	/* Get interface address */
1119 		ret = 0;
1120 		sin->sin_addr.s_addr = ifa->ifa_local;
1121 		break;
1122 
1123 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1124 		ret = 0;
1125 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1126 		break;
1127 
1128 	case SIOCGIFDSTADDR:	/* Get the destination address */
1129 		ret = 0;
1130 		sin->sin_addr.s_addr = ifa->ifa_address;
1131 		break;
1132 
1133 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1134 		ret = 0;
1135 		sin->sin_addr.s_addr = ifa->ifa_mask;
1136 		break;
1137 
1138 	case SIOCSIFFLAGS:
1139 		if (colon) {
1140 			ret = -EADDRNOTAVAIL;
1141 			if (!ifa)
1142 				break;
1143 			ret = 0;
1144 			if (!(ifr->ifr_flags & IFF_UP))
1145 				inet_del_ifa(in_dev, ifap, 1);
1146 			break;
1147 		}
1148 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1149 		break;
1150 
1151 	case SIOCSIFADDR:	/* Set interface address (and family) */
1152 		ret = -EINVAL;
1153 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1154 			break;
1155 
1156 		if (!ifa) {
1157 			ret = -ENOBUFS;
1158 			ifa = inet_alloc_ifa();
1159 			if (!ifa)
1160 				break;
1161 			INIT_HLIST_NODE(&ifa->hash);
1162 			if (colon)
1163 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1164 			else
1165 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1166 		} else {
1167 			ret = 0;
1168 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1169 				break;
1170 			inet_del_ifa(in_dev, ifap, 0);
1171 			ifa->ifa_broadcast = 0;
1172 			ifa->ifa_scope = 0;
1173 		}
1174 
1175 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1176 
1177 		if (!(dev->flags & IFF_POINTOPOINT)) {
1178 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1179 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1180 			if ((dev->flags & IFF_BROADCAST) &&
1181 			    ifa->ifa_prefixlen < 31)
1182 				ifa->ifa_broadcast = ifa->ifa_address |
1183 						     ~ifa->ifa_mask;
1184 		} else {
1185 			ifa->ifa_prefixlen = 32;
1186 			ifa->ifa_mask = inet_make_mask(32);
1187 		}
1188 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1189 		ret = inet_set_ifa(dev, ifa);
1190 		break;
1191 
1192 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1193 		ret = 0;
1194 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1195 			inet_del_ifa(in_dev, ifap, 0);
1196 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1197 			inet_insert_ifa(ifa);
1198 		}
1199 		break;
1200 
1201 	case SIOCSIFDSTADDR:	/* Set the destination address */
1202 		ret = 0;
1203 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1204 			break;
1205 		ret = -EINVAL;
1206 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1207 			break;
1208 		ret = 0;
1209 		inet_del_ifa(in_dev, ifap, 0);
1210 		ifa->ifa_address = sin->sin_addr.s_addr;
1211 		inet_insert_ifa(ifa);
1212 		break;
1213 
1214 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1215 
1216 		/*
1217 		 *	The mask we set must be legal.
1218 		 */
1219 		ret = -EINVAL;
1220 		if (bad_mask(sin->sin_addr.s_addr, 0))
1221 			break;
1222 		ret = 0;
1223 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1224 			__be32 old_mask = ifa->ifa_mask;
1225 			inet_del_ifa(in_dev, ifap, 0);
1226 			ifa->ifa_mask = sin->sin_addr.s_addr;
1227 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1228 
1229 			/* See if current broadcast address matches
1230 			 * with current netmask, then recalculate
1231 			 * the broadcast address. Otherwise it's a
1232 			 * funny address, so don't touch it since
1233 			 * the user seems to know what (s)he's doing...
1234 			 */
1235 			if ((dev->flags & IFF_BROADCAST) &&
1236 			    (ifa->ifa_prefixlen < 31) &&
1237 			    (ifa->ifa_broadcast ==
1238 			     (ifa->ifa_local|~old_mask))) {
1239 				ifa->ifa_broadcast = (ifa->ifa_local |
1240 						      ~sin->sin_addr.s_addr);
1241 			}
1242 			inet_insert_ifa(ifa);
1243 		}
1244 		break;
1245 	}
1246 done:
1247 	rtnl_unlock();
1248 out:
1249 	return ret;
1250 }
1251 
1252 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1253 {
1254 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1255 	const struct in_ifaddr *ifa;
1256 	struct ifreq ifr;
1257 	int done = 0;
1258 
1259 	if (WARN_ON(size > sizeof(struct ifreq)))
1260 		goto out;
1261 
1262 	if (!in_dev)
1263 		goto out;
1264 
1265 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1266 		if (!buf) {
1267 			done += size;
1268 			continue;
1269 		}
1270 		if (len < size)
1271 			break;
1272 		memset(&ifr, 0, sizeof(struct ifreq));
1273 		strcpy(ifr.ifr_name, ifa->ifa_label);
1274 
1275 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1276 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1277 								ifa->ifa_local;
1278 
1279 		if (copy_to_user(buf + done, &ifr, size)) {
1280 			done = -EFAULT;
1281 			break;
1282 		}
1283 		len  -= size;
1284 		done += size;
1285 	}
1286 out:
1287 	return done;
1288 }
1289 
1290 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1291 				 int scope)
1292 {
1293 	const struct in_ifaddr *ifa;
1294 
1295 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1296 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1297 			continue;
1298 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1299 		    ifa->ifa_scope <= scope)
1300 			return ifa->ifa_local;
1301 	}
1302 
1303 	return 0;
1304 }
1305 
1306 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1307 {
1308 	const struct in_ifaddr *ifa;
1309 	__be32 addr = 0;
1310 	unsigned char localnet_scope = RT_SCOPE_HOST;
1311 	struct in_device *in_dev;
1312 	struct net *net = dev_net(dev);
1313 	int master_idx;
1314 
1315 	rcu_read_lock();
1316 	in_dev = __in_dev_get_rcu(dev);
1317 	if (!in_dev)
1318 		goto no_in_dev;
1319 
1320 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1321 		localnet_scope = RT_SCOPE_LINK;
1322 
1323 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1324 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1325 			continue;
1326 		if (min(ifa->ifa_scope, localnet_scope) > scope)
1327 			continue;
1328 		if (!dst || inet_ifa_match(dst, ifa)) {
1329 			addr = ifa->ifa_local;
1330 			break;
1331 		}
1332 		if (!addr)
1333 			addr = ifa->ifa_local;
1334 	}
1335 
1336 	if (addr)
1337 		goto out_unlock;
1338 no_in_dev:
1339 	master_idx = l3mdev_master_ifindex_rcu(dev);
1340 
1341 	/* For VRFs, the VRF device takes the place of the loopback device,
1342 	 * with addresses on it being preferred.  Note in such cases the
1343 	 * loopback device will be among the devices that fail the master_idx
1344 	 * equality check in the loop below.
1345 	 */
1346 	if (master_idx &&
1347 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1348 	    (in_dev = __in_dev_get_rcu(dev))) {
1349 		addr = in_dev_select_addr(in_dev, scope);
1350 		if (addr)
1351 			goto out_unlock;
1352 	}
1353 
1354 	/* Not loopback addresses on loopback should be preferred
1355 	   in this case. It is important that lo is the first interface
1356 	   in dev_base list.
1357 	 */
1358 	for_each_netdev_rcu(net, dev) {
1359 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1360 			continue;
1361 
1362 		in_dev = __in_dev_get_rcu(dev);
1363 		if (!in_dev)
1364 			continue;
1365 
1366 		addr = in_dev_select_addr(in_dev, scope);
1367 		if (addr)
1368 			goto out_unlock;
1369 	}
1370 out_unlock:
1371 	rcu_read_unlock();
1372 	return addr;
1373 }
1374 EXPORT_SYMBOL(inet_select_addr);
1375 
1376 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1377 			      __be32 local, int scope)
1378 {
1379 	unsigned char localnet_scope = RT_SCOPE_HOST;
1380 	const struct in_ifaddr *ifa;
1381 	__be32 addr = 0;
1382 	int same = 0;
1383 
1384 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1385 		localnet_scope = RT_SCOPE_LINK;
1386 
1387 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1388 		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1389 
1390 		if (!addr &&
1391 		    (local == ifa->ifa_local || !local) &&
1392 		    min_scope <= scope) {
1393 			addr = ifa->ifa_local;
1394 			if (same)
1395 				break;
1396 		}
1397 		if (!same) {
1398 			same = (!local || inet_ifa_match(local, ifa)) &&
1399 				(!dst || inet_ifa_match(dst, ifa));
1400 			if (same && addr) {
1401 				if (local || !dst)
1402 					break;
1403 				/* Is the selected addr into dst subnet? */
1404 				if (inet_ifa_match(addr, ifa))
1405 					break;
1406 				/* No, then can we use new local src? */
1407 				if (min_scope <= scope) {
1408 					addr = ifa->ifa_local;
1409 					break;
1410 				}
1411 				/* search for large dst subnet for addr */
1412 				same = 0;
1413 			}
1414 		}
1415 	}
1416 
1417 	return same ? addr : 0;
1418 }
1419 
1420 /*
1421  * Confirm that local IP address exists using wildcards:
1422  * - net: netns to check, cannot be NULL
1423  * - in_dev: only on this interface, NULL=any interface
1424  * - dst: only in the same subnet as dst, 0=any dst
1425  * - local: address, 0=autoselect the local address
1426  * - scope: maximum allowed scope value for the local address
1427  */
1428 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1429 			 __be32 dst, __be32 local, int scope)
1430 {
1431 	__be32 addr = 0;
1432 	struct net_device *dev;
1433 
1434 	if (in_dev)
1435 		return confirm_addr_indev(in_dev, dst, local, scope);
1436 
1437 	rcu_read_lock();
1438 	for_each_netdev_rcu(net, dev) {
1439 		in_dev = __in_dev_get_rcu(dev);
1440 		if (in_dev) {
1441 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1442 			if (addr)
1443 				break;
1444 		}
1445 	}
1446 	rcu_read_unlock();
1447 
1448 	return addr;
1449 }
1450 EXPORT_SYMBOL(inet_confirm_addr);
1451 
1452 /*
1453  *	Device notifier
1454  */
1455 
1456 int register_inetaddr_notifier(struct notifier_block *nb)
1457 {
1458 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1459 }
1460 EXPORT_SYMBOL(register_inetaddr_notifier);
1461 
1462 int unregister_inetaddr_notifier(struct notifier_block *nb)
1463 {
1464 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1465 }
1466 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1467 
1468 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1469 {
1470 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1471 }
1472 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1473 
1474 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1475 {
1476 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1477 	    nb);
1478 }
1479 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1480 
1481 /* Rename ifa_labels for a device name change. Make some effort to preserve
1482  * existing alias numbering and to create unique labels if possible.
1483 */
1484 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1485 {
1486 	struct in_ifaddr *ifa;
1487 	int named = 0;
1488 
1489 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1490 		char old[IFNAMSIZ], *dot;
1491 
1492 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1493 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1494 		if (named++ == 0)
1495 			goto skip;
1496 		dot = strchr(old, ':');
1497 		if (!dot) {
1498 			sprintf(old, ":%d", named);
1499 			dot = old;
1500 		}
1501 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1502 			strcat(ifa->ifa_label, dot);
1503 		else
1504 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1505 skip:
1506 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1507 	}
1508 }
1509 
1510 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1511 					struct in_device *in_dev)
1512 
1513 {
1514 	const struct in_ifaddr *ifa;
1515 
1516 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1517 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1518 			 ifa->ifa_local, dev,
1519 			 ifa->ifa_local, NULL,
1520 			 dev->dev_addr, NULL);
1521 	}
1522 }
1523 
1524 /* Called only under RTNL semaphore */
1525 
1526 static int inetdev_event(struct notifier_block *this, unsigned long event,
1527 			 void *ptr)
1528 {
1529 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1530 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1531 
1532 	ASSERT_RTNL();
1533 
1534 	if (!in_dev) {
1535 		if (event == NETDEV_REGISTER) {
1536 			in_dev = inetdev_init(dev);
1537 			if (IS_ERR(in_dev))
1538 				return notifier_from_errno(PTR_ERR(in_dev));
1539 			if (dev->flags & IFF_LOOPBACK) {
1540 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1541 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1542 			}
1543 		} else if (event == NETDEV_CHANGEMTU) {
1544 			/* Re-enabling IP */
1545 			if (inetdev_valid_mtu(dev->mtu))
1546 				in_dev = inetdev_init(dev);
1547 		}
1548 		goto out;
1549 	}
1550 
1551 	switch (event) {
1552 	case NETDEV_REGISTER:
1553 		pr_debug("%s: bug\n", __func__);
1554 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1555 		break;
1556 	case NETDEV_UP:
1557 		if (!inetdev_valid_mtu(dev->mtu))
1558 			break;
1559 		if (dev->flags & IFF_LOOPBACK) {
1560 			struct in_ifaddr *ifa = inet_alloc_ifa();
1561 
1562 			if (ifa) {
1563 				INIT_HLIST_NODE(&ifa->hash);
1564 				ifa->ifa_local =
1565 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1566 				ifa->ifa_prefixlen = 8;
1567 				ifa->ifa_mask = inet_make_mask(8);
1568 				in_dev_hold(in_dev);
1569 				ifa->ifa_dev = in_dev;
1570 				ifa->ifa_scope = RT_SCOPE_HOST;
1571 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1572 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1573 						 INFINITY_LIFE_TIME);
1574 				ipv4_devconf_setall(in_dev);
1575 				neigh_parms_data_state_setall(in_dev->arp_parms);
1576 				inet_insert_ifa(ifa);
1577 			}
1578 		}
1579 		ip_mc_up(in_dev);
1580 		fallthrough;
1581 	case NETDEV_CHANGEADDR:
1582 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1583 			break;
1584 		fallthrough;
1585 	case NETDEV_NOTIFY_PEERS:
1586 		/* Send gratuitous ARP to notify of link change */
1587 		inetdev_send_gratuitous_arp(dev, in_dev);
1588 		break;
1589 	case NETDEV_DOWN:
1590 		ip_mc_down(in_dev);
1591 		break;
1592 	case NETDEV_PRE_TYPE_CHANGE:
1593 		ip_mc_unmap(in_dev);
1594 		break;
1595 	case NETDEV_POST_TYPE_CHANGE:
1596 		ip_mc_remap(in_dev);
1597 		break;
1598 	case NETDEV_CHANGEMTU:
1599 		if (inetdev_valid_mtu(dev->mtu))
1600 			break;
1601 		/* disable IP when MTU is not enough */
1602 		fallthrough;
1603 	case NETDEV_UNREGISTER:
1604 		inetdev_destroy(in_dev);
1605 		break;
1606 	case NETDEV_CHANGENAME:
1607 		/* Do not notify about label change, this event is
1608 		 * not interesting to applications using netlink.
1609 		 */
1610 		inetdev_changename(dev, in_dev);
1611 
1612 		devinet_sysctl_unregister(in_dev);
1613 		devinet_sysctl_register(in_dev);
1614 		break;
1615 	}
1616 out:
1617 	return NOTIFY_DONE;
1618 }
1619 
1620 static struct notifier_block ip_netdev_notifier = {
1621 	.notifier_call = inetdev_event,
1622 };
1623 
1624 static size_t inet_nlmsg_size(void)
1625 {
1626 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1627 	       + nla_total_size(4) /* IFA_ADDRESS */
1628 	       + nla_total_size(4) /* IFA_LOCAL */
1629 	       + nla_total_size(4) /* IFA_BROADCAST */
1630 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1631 	       + nla_total_size(4)  /* IFA_FLAGS */
1632 	       + nla_total_size(1)  /* IFA_PROTO */
1633 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1634 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1635 }
1636 
1637 static inline u32 cstamp_delta(unsigned long cstamp)
1638 {
1639 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1640 }
1641 
1642 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1643 			 unsigned long tstamp, u32 preferred, u32 valid)
1644 {
1645 	struct ifa_cacheinfo ci;
1646 
1647 	ci.cstamp = cstamp_delta(cstamp);
1648 	ci.tstamp = cstamp_delta(tstamp);
1649 	ci.ifa_prefered = preferred;
1650 	ci.ifa_valid = valid;
1651 
1652 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1653 }
1654 
1655 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1656 			    struct inet_fill_args *args)
1657 {
1658 	struct ifaddrmsg *ifm;
1659 	struct nlmsghdr  *nlh;
1660 	u32 preferred, valid;
1661 
1662 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1663 			args->flags);
1664 	if (!nlh)
1665 		return -EMSGSIZE;
1666 
1667 	ifm = nlmsg_data(nlh);
1668 	ifm->ifa_family = AF_INET;
1669 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1670 	ifm->ifa_flags = ifa->ifa_flags;
1671 	ifm->ifa_scope = ifa->ifa_scope;
1672 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1673 
1674 	if (args->netnsid >= 0 &&
1675 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1676 		goto nla_put_failure;
1677 
1678 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1679 		preferred = ifa->ifa_preferred_lft;
1680 		valid = ifa->ifa_valid_lft;
1681 		if (preferred != INFINITY_LIFE_TIME) {
1682 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1683 
1684 			if (preferred > tval)
1685 				preferred -= tval;
1686 			else
1687 				preferred = 0;
1688 			if (valid != INFINITY_LIFE_TIME) {
1689 				if (valid > tval)
1690 					valid -= tval;
1691 				else
1692 					valid = 0;
1693 			}
1694 		}
1695 	} else {
1696 		preferred = INFINITY_LIFE_TIME;
1697 		valid = INFINITY_LIFE_TIME;
1698 	}
1699 	if ((ifa->ifa_address &&
1700 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1701 	    (ifa->ifa_local &&
1702 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1703 	    (ifa->ifa_broadcast &&
1704 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1705 	    (ifa->ifa_label[0] &&
1706 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1707 	    (ifa->ifa_proto &&
1708 	     nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1709 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1710 	    (ifa->ifa_rt_priority &&
1711 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1712 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1713 			  preferred, valid))
1714 		goto nla_put_failure;
1715 
1716 	nlmsg_end(skb, nlh);
1717 	return 0;
1718 
1719 nla_put_failure:
1720 	nlmsg_cancel(skb, nlh);
1721 	return -EMSGSIZE;
1722 }
1723 
1724 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1725 				      struct inet_fill_args *fillargs,
1726 				      struct net **tgt_net, struct sock *sk,
1727 				      struct netlink_callback *cb)
1728 {
1729 	struct netlink_ext_ack *extack = cb->extack;
1730 	struct nlattr *tb[IFA_MAX+1];
1731 	struct ifaddrmsg *ifm;
1732 	int err, i;
1733 
1734 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1735 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1736 		return -EINVAL;
1737 	}
1738 
1739 	ifm = nlmsg_data(nlh);
1740 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1741 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1742 		return -EINVAL;
1743 	}
1744 
1745 	fillargs->ifindex = ifm->ifa_index;
1746 	if (fillargs->ifindex) {
1747 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1748 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1749 	}
1750 
1751 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1752 					    ifa_ipv4_policy, extack);
1753 	if (err < 0)
1754 		return err;
1755 
1756 	for (i = 0; i <= IFA_MAX; ++i) {
1757 		if (!tb[i])
1758 			continue;
1759 
1760 		if (i == IFA_TARGET_NETNSID) {
1761 			struct net *net;
1762 
1763 			fillargs->netnsid = nla_get_s32(tb[i]);
1764 
1765 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1766 			if (IS_ERR(net)) {
1767 				fillargs->netnsid = -1;
1768 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1769 				return PTR_ERR(net);
1770 			}
1771 			*tgt_net = net;
1772 		} else {
1773 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1774 			return -EINVAL;
1775 		}
1776 	}
1777 
1778 	return 0;
1779 }
1780 
1781 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1782 			    struct netlink_callback *cb, int s_ip_idx,
1783 			    struct inet_fill_args *fillargs)
1784 {
1785 	struct in_ifaddr *ifa;
1786 	int ip_idx = 0;
1787 	int err;
1788 
1789 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1790 		if (ip_idx < s_ip_idx) {
1791 			ip_idx++;
1792 			continue;
1793 		}
1794 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1795 		if (err < 0)
1796 			goto done;
1797 
1798 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1799 		ip_idx++;
1800 	}
1801 	err = 0;
1802 
1803 done:
1804 	cb->args[2] = ip_idx;
1805 
1806 	return err;
1807 }
1808 
1809 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1810 {
1811 	const struct nlmsghdr *nlh = cb->nlh;
1812 	struct inet_fill_args fillargs = {
1813 		.portid = NETLINK_CB(cb->skb).portid,
1814 		.seq = nlh->nlmsg_seq,
1815 		.event = RTM_NEWADDR,
1816 		.flags = NLM_F_MULTI,
1817 		.netnsid = -1,
1818 	};
1819 	struct net *net = sock_net(skb->sk);
1820 	struct net *tgt_net = net;
1821 	int h, s_h;
1822 	int idx, s_idx;
1823 	int s_ip_idx;
1824 	struct net_device *dev;
1825 	struct in_device *in_dev;
1826 	struct hlist_head *head;
1827 	int err = 0;
1828 
1829 	s_h = cb->args[0];
1830 	s_idx = idx = cb->args[1];
1831 	s_ip_idx = cb->args[2];
1832 
1833 	if (cb->strict_check) {
1834 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1835 						 skb->sk, cb);
1836 		if (err < 0)
1837 			goto put_tgt_net;
1838 
1839 		err = 0;
1840 		if (fillargs.ifindex) {
1841 			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1842 			if (!dev) {
1843 				err = -ENODEV;
1844 				goto put_tgt_net;
1845 			}
1846 
1847 			in_dev = __in_dev_get_rtnl(dev);
1848 			if (in_dev) {
1849 				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1850 						       &fillargs);
1851 			}
1852 			goto put_tgt_net;
1853 		}
1854 	}
1855 
1856 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1857 		idx = 0;
1858 		head = &tgt_net->dev_index_head[h];
1859 		rcu_read_lock();
1860 		cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1861 			  tgt_net->dev_base_seq;
1862 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1863 			if (idx < s_idx)
1864 				goto cont;
1865 			if (h > s_h || idx > s_idx)
1866 				s_ip_idx = 0;
1867 			in_dev = __in_dev_get_rcu(dev);
1868 			if (!in_dev)
1869 				goto cont;
1870 
1871 			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1872 					       &fillargs);
1873 			if (err < 0) {
1874 				rcu_read_unlock();
1875 				goto done;
1876 			}
1877 cont:
1878 			idx++;
1879 		}
1880 		rcu_read_unlock();
1881 	}
1882 
1883 done:
1884 	cb->args[0] = h;
1885 	cb->args[1] = idx;
1886 put_tgt_net:
1887 	if (fillargs.netnsid >= 0)
1888 		put_net(tgt_net);
1889 
1890 	return skb->len ? : err;
1891 }
1892 
1893 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1894 		      u32 portid)
1895 {
1896 	struct inet_fill_args fillargs = {
1897 		.portid = portid,
1898 		.seq = nlh ? nlh->nlmsg_seq : 0,
1899 		.event = event,
1900 		.flags = 0,
1901 		.netnsid = -1,
1902 	};
1903 	struct sk_buff *skb;
1904 	int err = -ENOBUFS;
1905 	struct net *net;
1906 
1907 	net = dev_net(ifa->ifa_dev->dev);
1908 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1909 	if (!skb)
1910 		goto errout;
1911 
1912 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1913 	if (err < 0) {
1914 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1915 		WARN_ON(err == -EMSGSIZE);
1916 		kfree_skb(skb);
1917 		goto errout;
1918 	}
1919 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1920 	return;
1921 errout:
1922 	if (err < 0)
1923 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1924 }
1925 
1926 static size_t inet_get_link_af_size(const struct net_device *dev,
1927 				    u32 ext_filter_mask)
1928 {
1929 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1930 
1931 	if (!in_dev)
1932 		return 0;
1933 
1934 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1935 }
1936 
1937 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1938 			     u32 ext_filter_mask)
1939 {
1940 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1941 	struct nlattr *nla;
1942 	int i;
1943 
1944 	if (!in_dev)
1945 		return -ENODATA;
1946 
1947 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1948 	if (!nla)
1949 		return -EMSGSIZE;
1950 
1951 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1952 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1953 
1954 	return 0;
1955 }
1956 
1957 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1958 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1959 };
1960 
1961 static int inet_validate_link_af(const struct net_device *dev,
1962 				 const struct nlattr *nla,
1963 				 struct netlink_ext_ack *extack)
1964 {
1965 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1966 	int err, rem;
1967 
1968 	if (dev && !__in_dev_get_rtnl(dev))
1969 		return -EAFNOSUPPORT;
1970 
1971 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1972 					  inet_af_policy, extack);
1973 	if (err < 0)
1974 		return err;
1975 
1976 	if (tb[IFLA_INET_CONF]) {
1977 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1978 			int cfgid = nla_type(a);
1979 
1980 			if (nla_len(a) < 4)
1981 				return -EINVAL;
1982 
1983 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1984 				return -EINVAL;
1985 		}
1986 	}
1987 
1988 	return 0;
1989 }
1990 
1991 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
1992 			    struct netlink_ext_ack *extack)
1993 {
1994 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1995 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1996 	int rem;
1997 
1998 	if (!in_dev)
1999 		return -EAFNOSUPPORT;
2000 
2001 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2002 		return -EINVAL;
2003 
2004 	if (tb[IFLA_INET_CONF]) {
2005 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2006 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2007 	}
2008 
2009 	return 0;
2010 }
2011 
2012 static int inet_netconf_msgsize_devconf(int type)
2013 {
2014 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2015 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2016 	bool all = false;
2017 
2018 	if (type == NETCONFA_ALL)
2019 		all = true;
2020 
2021 	if (all || type == NETCONFA_FORWARDING)
2022 		size += nla_total_size(4);
2023 	if (all || type == NETCONFA_RP_FILTER)
2024 		size += nla_total_size(4);
2025 	if (all || type == NETCONFA_MC_FORWARDING)
2026 		size += nla_total_size(4);
2027 	if (all || type == NETCONFA_BC_FORWARDING)
2028 		size += nla_total_size(4);
2029 	if (all || type == NETCONFA_PROXY_NEIGH)
2030 		size += nla_total_size(4);
2031 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2032 		size += nla_total_size(4);
2033 
2034 	return size;
2035 }
2036 
2037 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2038 				     struct ipv4_devconf *devconf, u32 portid,
2039 				     u32 seq, int event, unsigned int flags,
2040 				     int type)
2041 {
2042 	struct nlmsghdr  *nlh;
2043 	struct netconfmsg *ncm;
2044 	bool all = false;
2045 
2046 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2047 			flags);
2048 	if (!nlh)
2049 		return -EMSGSIZE;
2050 
2051 	if (type == NETCONFA_ALL)
2052 		all = true;
2053 
2054 	ncm = nlmsg_data(nlh);
2055 	ncm->ncm_family = AF_INET;
2056 
2057 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2058 		goto nla_put_failure;
2059 
2060 	if (!devconf)
2061 		goto out;
2062 
2063 	if ((all || type == NETCONFA_FORWARDING) &&
2064 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2065 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2066 		goto nla_put_failure;
2067 	if ((all || type == NETCONFA_RP_FILTER) &&
2068 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2069 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2070 		goto nla_put_failure;
2071 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2072 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2073 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2074 		goto nla_put_failure;
2075 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2076 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2077 			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2078 		goto nla_put_failure;
2079 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2080 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2081 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2082 		goto nla_put_failure;
2083 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2084 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2085 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2086 		goto nla_put_failure;
2087 
2088 out:
2089 	nlmsg_end(skb, nlh);
2090 	return 0;
2091 
2092 nla_put_failure:
2093 	nlmsg_cancel(skb, nlh);
2094 	return -EMSGSIZE;
2095 }
2096 
2097 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2098 				 int ifindex, struct ipv4_devconf *devconf)
2099 {
2100 	struct sk_buff *skb;
2101 	int err = -ENOBUFS;
2102 
2103 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2104 	if (!skb)
2105 		goto errout;
2106 
2107 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2108 					event, 0, type);
2109 	if (err < 0) {
2110 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2111 		WARN_ON(err == -EMSGSIZE);
2112 		kfree_skb(skb);
2113 		goto errout;
2114 	}
2115 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2116 	return;
2117 errout:
2118 	if (err < 0)
2119 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2120 }
2121 
2122 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2123 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2124 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2125 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2126 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2127 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2128 };
2129 
2130 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2131 				      const struct nlmsghdr *nlh,
2132 				      struct nlattr **tb,
2133 				      struct netlink_ext_ack *extack)
2134 {
2135 	int i, err;
2136 
2137 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2138 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2139 		return -EINVAL;
2140 	}
2141 
2142 	if (!netlink_strict_get_check(skb))
2143 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2144 					      tb, NETCONFA_MAX,
2145 					      devconf_ipv4_policy, extack);
2146 
2147 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2148 					    tb, NETCONFA_MAX,
2149 					    devconf_ipv4_policy, extack);
2150 	if (err)
2151 		return err;
2152 
2153 	for (i = 0; i <= NETCONFA_MAX; i++) {
2154 		if (!tb[i])
2155 			continue;
2156 
2157 		switch (i) {
2158 		case NETCONFA_IFINDEX:
2159 			break;
2160 		default:
2161 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2162 			return -EINVAL;
2163 		}
2164 	}
2165 
2166 	return 0;
2167 }
2168 
2169 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2170 				    struct nlmsghdr *nlh,
2171 				    struct netlink_ext_ack *extack)
2172 {
2173 	struct net *net = sock_net(in_skb->sk);
2174 	struct nlattr *tb[NETCONFA_MAX+1];
2175 	struct sk_buff *skb;
2176 	struct ipv4_devconf *devconf;
2177 	struct in_device *in_dev;
2178 	struct net_device *dev;
2179 	int ifindex;
2180 	int err;
2181 
2182 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2183 	if (err)
2184 		goto errout;
2185 
2186 	err = -EINVAL;
2187 	if (!tb[NETCONFA_IFINDEX])
2188 		goto errout;
2189 
2190 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2191 	switch (ifindex) {
2192 	case NETCONFA_IFINDEX_ALL:
2193 		devconf = net->ipv4.devconf_all;
2194 		break;
2195 	case NETCONFA_IFINDEX_DEFAULT:
2196 		devconf = net->ipv4.devconf_dflt;
2197 		break;
2198 	default:
2199 		dev = __dev_get_by_index(net, ifindex);
2200 		if (!dev)
2201 			goto errout;
2202 		in_dev = __in_dev_get_rtnl(dev);
2203 		if (!in_dev)
2204 			goto errout;
2205 		devconf = &in_dev->cnf;
2206 		break;
2207 	}
2208 
2209 	err = -ENOBUFS;
2210 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2211 	if (!skb)
2212 		goto errout;
2213 
2214 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2215 					NETLINK_CB(in_skb).portid,
2216 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2217 					NETCONFA_ALL);
2218 	if (err < 0) {
2219 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2220 		WARN_ON(err == -EMSGSIZE);
2221 		kfree_skb(skb);
2222 		goto errout;
2223 	}
2224 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2225 errout:
2226 	return err;
2227 }
2228 
2229 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2230 				     struct netlink_callback *cb)
2231 {
2232 	const struct nlmsghdr *nlh = cb->nlh;
2233 	struct net *net = sock_net(skb->sk);
2234 	int h, s_h;
2235 	int idx, s_idx;
2236 	struct net_device *dev;
2237 	struct in_device *in_dev;
2238 	struct hlist_head *head;
2239 
2240 	if (cb->strict_check) {
2241 		struct netlink_ext_ack *extack = cb->extack;
2242 		struct netconfmsg *ncm;
2243 
2244 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2245 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2246 			return -EINVAL;
2247 		}
2248 
2249 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2250 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2251 			return -EINVAL;
2252 		}
2253 	}
2254 
2255 	s_h = cb->args[0];
2256 	s_idx = idx = cb->args[1];
2257 
2258 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2259 		idx = 0;
2260 		head = &net->dev_index_head[h];
2261 		rcu_read_lock();
2262 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2263 			  net->dev_base_seq;
2264 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2265 			if (idx < s_idx)
2266 				goto cont;
2267 			in_dev = __in_dev_get_rcu(dev);
2268 			if (!in_dev)
2269 				goto cont;
2270 
2271 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2272 						      &in_dev->cnf,
2273 						      NETLINK_CB(cb->skb).portid,
2274 						      nlh->nlmsg_seq,
2275 						      RTM_NEWNETCONF,
2276 						      NLM_F_MULTI,
2277 						      NETCONFA_ALL) < 0) {
2278 				rcu_read_unlock();
2279 				goto done;
2280 			}
2281 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2282 cont:
2283 			idx++;
2284 		}
2285 		rcu_read_unlock();
2286 	}
2287 	if (h == NETDEV_HASHENTRIES) {
2288 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2289 					      net->ipv4.devconf_all,
2290 					      NETLINK_CB(cb->skb).portid,
2291 					      nlh->nlmsg_seq,
2292 					      RTM_NEWNETCONF, NLM_F_MULTI,
2293 					      NETCONFA_ALL) < 0)
2294 			goto done;
2295 		else
2296 			h++;
2297 	}
2298 	if (h == NETDEV_HASHENTRIES + 1) {
2299 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2300 					      net->ipv4.devconf_dflt,
2301 					      NETLINK_CB(cb->skb).portid,
2302 					      nlh->nlmsg_seq,
2303 					      RTM_NEWNETCONF, NLM_F_MULTI,
2304 					      NETCONFA_ALL) < 0)
2305 			goto done;
2306 		else
2307 			h++;
2308 	}
2309 done:
2310 	cb->args[0] = h;
2311 	cb->args[1] = idx;
2312 
2313 	return skb->len;
2314 }
2315 
2316 #ifdef CONFIG_SYSCTL
2317 
2318 static void devinet_copy_dflt_conf(struct net *net, int i)
2319 {
2320 	struct net_device *dev;
2321 
2322 	rcu_read_lock();
2323 	for_each_netdev_rcu(net, dev) {
2324 		struct in_device *in_dev;
2325 
2326 		in_dev = __in_dev_get_rcu(dev);
2327 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2328 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2329 	}
2330 	rcu_read_unlock();
2331 }
2332 
2333 /* called with RTNL locked */
2334 static void inet_forward_change(struct net *net)
2335 {
2336 	struct net_device *dev;
2337 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2338 
2339 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2340 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2341 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2342 				    NETCONFA_FORWARDING,
2343 				    NETCONFA_IFINDEX_ALL,
2344 				    net->ipv4.devconf_all);
2345 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2346 				    NETCONFA_FORWARDING,
2347 				    NETCONFA_IFINDEX_DEFAULT,
2348 				    net->ipv4.devconf_dflt);
2349 
2350 	for_each_netdev(net, dev) {
2351 		struct in_device *in_dev;
2352 
2353 		if (on)
2354 			dev_disable_lro(dev);
2355 
2356 		in_dev = __in_dev_get_rtnl(dev);
2357 		if (in_dev) {
2358 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2359 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2360 						    NETCONFA_FORWARDING,
2361 						    dev->ifindex, &in_dev->cnf);
2362 		}
2363 	}
2364 }
2365 
2366 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2367 {
2368 	if (cnf == net->ipv4.devconf_dflt)
2369 		return NETCONFA_IFINDEX_DEFAULT;
2370 	else if (cnf == net->ipv4.devconf_all)
2371 		return NETCONFA_IFINDEX_ALL;
2372 	else {
2373 		struct in_device *idev
2374 			= container_of(cnf, struct in_device, cnf);
2375 		return idev->dev->ifindex;
2376 	}
2377 }
2378 
2379 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2380 			     void *buffer, size_t *lenp, loff_t *ppos)
2381 {
2382 	int old_value = *(int *)ctl->data;
2383 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2384 	int new_value = *(int *)ctl->data;
2385 
2386 	if (write) {
2387 		struct ipv4_devconf *cnf = ctl->extra1;
2388 		struct net *net = ctl->extra2;
2389 		int i = (int *)ctl->data - cnf->data;
2390 		int ifindex;
2391 
2392 		set_bit(i, cnf->state);
2393 
2394 		if (cnf == net->ipv4.devconf_dflt)
2395 			devinet_copy_dflt_conf(net, i);
2396 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2397 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2398 			if ((new_value == 0) && (old_value != 0))
2399 				rt_cache_flush(net);
2400 
2401 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2402 		    new_value != old_value)
2403 			rt_cache_flush(net);
2404 
2405 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2406 		    new_value != old_value) {
2407 			ifindex = devinet_conf_ifindex(net, cnf);
2408 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2409 						    NETCONFA_RP_FILTER,
2410 						    ifindex, cnf);
2411 		}
2412 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2413 		    new_value != old_value) {
2414 			ifindex = devinet_conf_ifindex(net, cnf);
2415 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2416 						    NETCONFA_PROXY_NEIGH,
2417 						    ifindex, cnf);
2418 		}
2419 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2420 		    new_value != old_value) {
2421 			ifindex = devinet_conf_ifindex(net, cnf);
2422 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2423 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2424 						    ifindex, cnf);
2425 		}
2426 	}
2427 
2428 	return ret;
2429 }
2430 
2431 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2432 				  void *buffer, size_t *lenp, loff_t *ppos)
2433 {
2434 	int *valp = ctl->data;
2435 	int val = *valp;
2436 	loff_t pos = *ppos;
2437 	struct net *net = ctl->extra2;
2438 	int ret;
2439 
2440 	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2441 		return -EPERM;
2442 
2443 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2444 
2445 	if (write && *valp != val) {
2446 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2447 			if (!rtnl_trylock()) {
2448 				/* Restore the original values before restarting */
2449 				*valp = val;
2450 				*ppos = pos;
2451 				return restart_syscall();
2452 			}
2453 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2454 				inet_forward_change(net);
2455 			} else {
2456 				struct ipv4_devconf *cnf = ctl->extra1;
2457 				struct in_device *idev =
2458 					container_of(cnf, struct in_device, cnf);
2459 				if (*valp)
2460 					dev_disable_lro(idev->dev);
2461 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2462 							    NETCONFA_FORWARDING,
2463 							    idev->dev->ifindex,
2464 							    cnf);
2465 			}
2466 			rtnl_unlock();
2467 			rt_cache_flush(net);
2468 		} else
2469 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2470 						    NETCONFA_FORWARDING,
2471 						    NETCONFA_IFINDEX_DEFAULT,
2472 						    net->ipv4.devconf_dflt);
2473 	}
2474 
2475 	return ret;
2476 }
2477 
2478 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2479 				void *buffer, size_t *lenp, loff_t *ppos)
2480 {
2481 	int *valp = ctl->data;
2482 	int val = *valp;
2483 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2484 	struct net *net = ctl->extra2;
2485 
2486 	if (write && *valp != val)
2487 		rt_cache_flush(net);
2488 
2489 	return ret;
2490 }
2491 
2492 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2493 	{ \
2494 		.procname	= name, \
2495 		.data		= ipv4_devconf.data + \
2496 				  IPV4_DEVCONF_ ## attr - 1, \
2497 		.maxlen		= sizeof(int), \
2498 		.mode		= mval, \
2499 		.proc_handler	= proc, \
2500 		.extra1		= &ipv4_devconf, \
2501 	}
2502 
2503 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2504 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2505 
2506 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2507 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2508 
2509 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2510 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2511 
2512 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2513 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2514 
2515 static struct devinet_sysctl_table {
2516 	struct ctl_table_header *sysctl_header;
2517 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2518 } devinet_sysctl = {
2519 	.devinet_vars = {
2520 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2521 					     devinet_sysctl_forward),
2522 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2523 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2524 
2525 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2526 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2527 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2528 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2529 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2530 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2531 					"accept_source_route"),
2532 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2533 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2534 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2535 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2536 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2537 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2538 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2539 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2540 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2541 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2542 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2543 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2544 		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2545 					"arp_evict_nocarrier"),
2546 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2547 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2548 					"force_igmp_version"),
2549 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2550 					"igmpv2_unsolicited_report_interval"),
2551 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2552 					"igmpv3_unsolicited_report_interval"),
2553 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2554 					"ignore_routes_with_linkdown"),
2555 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2556 					"drop_gratuitous_arp"),
2557 
2558 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2559 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2560 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2561 					      "promote_secondaries"),
2562 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2563 					      "route_localnet"),
2564 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2565 					      "drop_unicast_in_l2_multicast"),
2566 	},
2567 };
2568 
2569 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2570 				     int ifindex, struct ipv4_devconf *p)
2571 {
2572 	int i;
2573 	struct devinet_sysctl_table *t;
2574 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2575 
2576 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2577 	if (!t)
2578 		goto out;
2579 
2580 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2581 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2582 		t->devinet_vars[i].extra1 = p;
2583 		t->devinet_vars[i].extra2 = net;
2584 	}
2585 
2586 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2587 
2588 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2589 	if (!t->sysctl_header)
2590 		goto free;
2591 
2592 	p->sysctl = t;
2593 
2594 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2595 				    ifindex, p);
2596 	return 0;
2597 
2598 free:
2599 	kfree(t);
2600 out:
2601 	return -ENOMEM;
2602 }
2603 
2604 static void __devinet_sysctl_unregister(struct net *net,
2605 					struct ipv4_devconf *cnf, int ifindex)
2606 {
2607 	struct devinet_sysctl_table *t = cnf->sysctl;
2608 
2609 	if (t) {
2610 		cnf->sysctl = NULL;
2611 		unregister_net_sysctl_table(t->sysctl_header);
2612 		kfree(t);
2613 	}
2614 
2615 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2616 }
2617 
2618 static int devinet_sysctl_register(struct in_device *idev)
2619 {
2620 	int err;
2621 
2622 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2623 		return -EINVAL;
2624 
2625 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2626 	if (err)
2627 		return err;
2628 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2629 					idev->dev->ifindex, &idev->cnf);
2630 	if (err)
2631 		neigh_sysctl_unregister(idev->arp_parms);
2632 	return err;
2633 }
2634 
2635 static void devinet_sysctl_unregister(struct in_device *idev)
2636 {
2637 	struct net *net = dev_net(idev->dev);
2638 
2639 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2640 	neigh_sysctl_unregister(idev->arp_parms);
2641 }
2642 
2643 static struct ctl_table ctl_forward_entry[] = {
2644 	{
2645 		.procname	= "ip_forward",
2646 		.data		= &ipv4_devconf.data[
2647 					IPV4_DEVCONF_FORWARDING - 1],
2648 		.maxlen		= sizeof(int),
2649 		.mode		= 0644,
2650 		.proc_handler	= devinet_sysctl_forward,
2651 		.extra1		= &ipv4_devconf,
2652 		.extra2		= &init_net,
2653 	},
2654 	{ },
2655 };
2656 #endif
2657 
2658 static __net_init int devinet_init_net(struct net *net)
2659 {
2660 	int err;
2661 	struct ipv4_devconf *all, *dflt;
2662 #ifdef CONFIG_SYSCTL
2663 	struct ctl_table *tbl;
2664 	struct ctl_table_header *forw_hdr;
2665 #endif
2666 
2667 	err = -ENOMEM;
2668 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2669 	if (!all)
2670 		goto err_alloc_all;
2671 
2672 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2673 	if (!dflt)
2674 		goto err_alloc_dflt;
2675 
2676 #ifdef CONFIG_SYSCTL
2677 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2678 	if (!tbl)
2679 		goto err_alloc_ctl;
2680 
2681 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2682 	tbl[0].extra1 = all;
2683 	tbl[0].extra2 = net;
2684 #endif
2685 
2686 	if (!net_eq(net, &init_net)) {
2687 		if (IS_ENABLED(CONFIG_SYSCTL) &&
2688 		    sysctl_devconf_inherit_init_net == 3) {
2689 			/* copy from the current netns */
2690 			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2691 			       sizeof(ipv4_devconf));
2692 			memcpy(dflt,
2693 			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2694 			       sizeof(ipv4_devconf_dflt));
2695 		} else if (!IS_ENABLED(CONFIG_SYSCTL) ||
2696 			   sysctl_devconf_inherit_init_net != 2) {
2697 			/* inherit == 0 or 1: copy from init_net */
2698 			memcpy(all, init_net.ipv4.devconf_all,
2699 			       sizeof(ipv4_devconf));
2700 			memcpy(dflt, init_net.ipv4.devconf_dflt,
2701 			       sizeof(ipv4_devconf_dflt));
2702 		}
2703 		/* else inherit == 2: use compiled values */
2704 	}
2705 
2706 #ifdef CONFIG_SYSCTL
2707 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2708 	if (err < 0)
2709 		goto err_reg_all;
2710 
2711 	err = __devinet_sysctl_register(net, "default",
2712 					NETCONFA_IFINDEX_DEFAULT, dflt);
2713 	if (err < 0)
2714 		goto err_reg_dflt;
2715 
2716 	err = -ENOMEM;
2717 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2718 	if (!forw_hdr)
2719 		goto err_reg_ctl;
2720 	net->ipv4.forw_hdr = forw_hdr;
2721 #endif
2722 
2723 	net->ipv4.devconf_all = all;
2724 	net->ipv4.devconf_dflt = dflt;
2725 	return 0;
2726 
2727 #ifdef CONFIG_SYSCTL
2728 err_reg_ctl:
2729 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2730 err_reg_dflt:
2731 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2732 err_reg_all:
2733 	kfree(tbl);
2734 err_alloc_ctl:
2735 #endif
2736 	kfree(dflt);
2737 err_alloc_dflt:
2738 	kfree(all);
2739 err_alloc_all:
2740 	return err;
2741 }
2742 
2743 static __net_exit void devinet_exit_net(struct net *net)
2744 {
2745 #ifdef CONFIG_SYSCTL
2746 	struct ctl_table *tbl;
2747 
2748 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2749 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2750 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2751 				    NETCONFA_IFINDEX_DEFAULT);
2752 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2753 				    NETCONFA_IFINDEX_ALL);
2754 	kfree(tbl);
2755 #endif
2756 	kfree(net->ipv4.devconf_dflt);
2757 	kfree(net->ipv4.devconf_all);
2758 }
2759 
2760 static __net_initdata struct pernet_operations devinet_ops = {
2761 	.init = devinet_init_net,
2762 	.exit = devinet_exit_net,
2763 };
2764 
2765 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2766 	.family		  = AF_INET,
2767 	.fill_link_af	  = inet_fill_link_af,
2768 	.get_link_af_size = inet_get_link_af_size,
2769 	.validate_link_af = inet_validate_link_af,
2770 	.set_link_af	  = inet_set_link_af,
2771 };
2772 
2773 void __init devinet_init(void)
2774 {
2775 	int i;
2776 
2777 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2778 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2779 
2780 	register_pernet_subsys(&devinet_ops);
2781 	register_netdevice_notifier(&ip_netdev_notifier);
2782 
2783 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2784 
2785 	rtnl_af_register(&inet_af_ops);
2786 
2787 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2788 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2789 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2790 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2791 		      inet_netconf_dump_devconf, 0);
2792 }
2793