xref: /openbmc/linux/net/ipv4/devinet.c (revision 89df62c3)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	NET3	IP device support routines.
4  *
5  *	Derived from the IP parts of dev.c 1.0.19
6  * 		Authors:	Ross Biro
7  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *	Additional Authors:
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *	Changes:
15  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16  *					lists.
17  *		Cyrus Durgin:		updated for kmod
18  *		Matthias Andree:	in devinet_ioctl, compare label and
19  *					address (4.4BSD alias style support),
20  *					fall back to comparing just the label
21  *					if no match found.
22  */
23 
24 
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56 
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64 
65 #define IPV6ONLY_FLAGS	\
66 		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79 	},
80 };
81 
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83 	.data = {
84 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92 	},
93 };
94 
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97 
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 	[IFA_LOCAL]     	= { .type = NLA_U32 },
100 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
101 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104 	[IFA_FLAGS]		= { .type = NLA_U32 },
105 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107 	[IFA_PROTO]		= { .type = NLA_U8 },
108 };
109 
110 struct inet_fill_args {
111 	u32 portid;
112 	u32 seq;
113 	int event;
114 	unsigned int flags;
115 	int netnsid;
116 	int ifindex;
117 };
118 
119 #define IN4_ADDR_HSIZE_SHIFT	8
120 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
121 
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123 
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
125 {
126 	u32 val = (__force u32) addr ^ net_hash_mix(net);
127 
128 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129 }
130 
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132 {
133 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
134 
135 	ASSERT_RTNL();
136 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137 }
138 
139 static void inet_hash_remove(struct in_ifaddr *ifa)
140 {
141 	ASSERT_RTNL();
142 	hlist_del_init_rcu(&ifa->hash);
143 }
144 
145 /**
146  * __ip_dev_find - find the first device with a given source address.
147  * @net: the net namespace
148  * @addr: the source address
149  * @devref: if true, take a reference on the found device
150  *
151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
152  */
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154 {
155 	struct net_device *result = NULL;
156 	struct in_ifaddr *ifa;
157 
158 	rcu_read_lock();
159 	ifa = inet_lookup_ifaddr_rcu(net, addr);
160 	if (!ifa) {
161 		struct flowi4 fl4 = { .daddr = addr };
162 		struct fib_result res = { 0 };
163 		struct fib_table *local;
164 
165 		/* Fallback to FIB local table so that communication
166 		 * over loopback subnets work.
167 		 */
168 		local = fib_get_table(net, RT_TABLE_LOCAL);
169 		if (local &&
170 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171 		    res.type == RTN_LOCAL)
172 			result = FIB_RES_DEV(res);
173 	} else {
174 		result = ifa->ifa_dev->dev;
175 	}
176 	if (result && devref)
177 		dev_hold(result);
178 	rcu_read_unlock();
179 	return result;
180 }
181 EXPORT_SYMBOL(__ip_dev_find);
182 
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185 {
186 	u32 hash = inet_addr_hash(net, addr);
187 	struct in_ifaddr *ifa;
188 
189 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190 		if (ifa->ifa_local == addr &&
191 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
192 			return ifa;
193 
194 	return NULL;
195 }
196 
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198 
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202 			 struct in_ifaddr __rcu **ifap,
203 			 int destroy);
204 #ifdef CONFIG_SYSCTL
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
207 #else
208 static int devinet_sysctl_register(struct in_device *idev)
209 {
210 	return 0;
211 }
212 static void devinet_sysctl_unregister(struct in_device *idev)
213 {
214 }
215 #endif
216 
217 /* Locks all the inet devices. */
218 
219 static struct in_ifaddr *inet_alloc_ifa(void)
220 {
221 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222 }
223 
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227 	if (ifa->ifa_dev)
228 		in_dev_put(ifa->ifa_dev);
229 	kfree(ifa);
230 }
231 
232 static void inet_free_ifa(struct in_ifaddr *ifa)
233 {
234 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
235 }
236 
237 static void in_dev_free_rcu(struct rcu_head *head)
238 {
239 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
240 
241 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
242 	kfree(idev);
243 }
244 
245 void in_dev_finish_destroy(struct in_device *idev)
246 {
247 	struct net_device *dev = idev->dev;
248 
249 	WARN_ON(idev->ifa_list);
250 	WARN_ON(idev->mc_list);
251 #ifdef NET_REFCNT_DEBUG
252 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
253 #endif
254 	netdev_put(dev, &idev->dev_tracker);
255 	if (!idev->dead)
256 		pr_err("Freeing alive in_device %p\n", idev);
257 	else
258 		call_rcu(&idev->rcu_head, in_dev_free_rcu);
259 }
260 EXPORT_SYMBOL(in_dev_finish_destroy);
261 
262 static struct in_device *inetdev_init(struct net_device *dev)
263 {
264 	struct in_device *in_dev;
265 	int err = -ENOMEM;
266 
267 	ASSERT_RTNL();
268 
269 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
270 	if (!in_dev)
271 		goto out;
272 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
273 			sizeof(in_dev->cnf));
274 	in_dev->cnf.sysctl = NULL;
275 	in_dev->dev = dev;
276 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
277 	if (!in_dev->arp_parms)
278 		goto out_kfree;
279 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
280 		dev_disable_lro(dev);
281 	/* Reference in_dev->dev */
282 	netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
283 	/* Account for reference dev->ip_ptr (below) */
284 	refcount_set(&in_dev->refcnt, 1);
285 
286 	err = devinet_sysctl_register(in_dev);
287 	if (err) {
288 		in_dev->dead = 1;
289 		neigh_parms_release(&arp_tbl, in_dev->arp_parms);
290 		in_dev_put(in_dev);
291 		in_dev = NULL;
292 		goto out;
293 	}
294 	ip_mc_init_dev(in_dev);
295 	if (dev->flags & IFF_UP)
296 		ip_mc_up(in_dev);
297 
298 	/* we can receive as soon as ip_ptr is set -- do this last */
299 	rcu_assign_pointer(dev->ip_ptr, in_dev);
300 out:
301 	return in_dev ?: ERR_PTR(err);
302 out_kfree:
303 	kfree(in_dev);
304 	in_dev = NULL;
305 	goto out;
306 }
307 
308 static void inetdev_destroy(struct in_device *in_dev)
309 {
310 	struct net_device *dev;
311 	struct in_ifaddr *ifa;
312 
313 	ASSERT_RTNL();
314 
315 	dev = in_dev->dev;
316 
317 	in_dev->dead = 1;
318 
319 	ip_mc_destroy_dev(in_dev);
320 
321 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
322 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
323 		inet_free_ifa(ifa);
324 	}
325 
326 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
327 
328 	devinet_sysctl_unregister(in_dev);
329 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
330 	arp_ifdown(dev);
331 
332 	in_dev_put(in_dev);
333 }
334 
335 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
336 {
337 	const struct in_ifaddr *ifa;
338 
339 	rcu_read_lock();
340 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
341 		if (inet_ifa_match(a, ifa)) {
342 			if (!b || inet_ifa_match(b, ifa)) {
343 				rcu_read_unlock();
344 				return 1;
345 			}
346 		}
347 	}
348 	rcu_read_unlock();
349 	return 0;
350 }
351 
352 static void __inet_del_ifa(struct in_device *in_dev,
353 			   struct in_ifaddr __rcu **ifap,
354 			   int destroy, struct nlmsghdr *nlh, u32 portid)
355 {
356 	struct in_ifaddr *promote = NULL;
357 	struct in_ifaddr *ifa, *ifa1;
358 	struct in_ifaddr *last_prim;
359 	struct in_ifaddr *prev_prom = NULL;
360 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
361 
362 	ASSERT_RTNL();
363 
364 	ifa1 = rtnl_dereference(*ifap);
365 	last_prim = rtnl_dereference(in_dev->ifa_list);
366 	if (in_dev->dead)
367 		goto no_promotions;
368 
369 	/* 1. Deleting primary ifaddr forces deletion all secondaries
370 	 * unless alias promotion is set
371 	 **/
372 
373 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
374 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
375 
376 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
377 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
378 			    ifa1->ifa_scope <= ifa->ifa_scope)
379 				last_prim = ifa;
380 
381 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
382 			    ifa1->ifa_mask != ifa->ifa_mask ||
383 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
384 				ifap1 = &ifa->ifa_next;
385 				prev_prom = ifa;
386 				continue;
387 			}
388 
389 			if (!do_promote) {
390 				inet_hash_remove(ifa);
391 				*ifap1 = ifa->ifa_next;
392 
393 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
394 				blocking_notifier_call_chain(&inetaddr_chain,
395 						NETDEV_DOWN, ifa);
396 				inet_free_ifa(ifa);
397 			} else {
398 				promote = ifa;
399 				break;
400 			}
401 		}
402 	}
403 
404 	/* On promotion all secondaries from subnet are changing
405 	 * the primary IP, we must remove all their routes silently
406 	 * and later to add them back with new prefsrc. Do this
407 	 * while all addresses are on the device list.
408 	 */
409 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
410 		if (ifa1->ifa_mask == ifa->ifa_mask &&
411 		    inet_ifa_match(ifa1->ifa_address, ifa))
412 			fib_del_ifaddr(ifa, ifa1);
413 	}
414 
415 no_promotions:
416 	/* 2. Unlink it */
417 
418 	*ifap = ifa1->ifa_next;
419 	inet_hash_remove(ifa1);
420 
421 	/* 3. Announce address deletion */
422 
423 	/* Send message first, then call notifier.
424 	   At first sight, FIB update triggered by notifier
425 	   will refer to already deleted ifaddr, that could confuse
426 	   netlink listeners. It is not true: look, gated sees
427 	   that route deleted and if it still thinks that ifaddr
428 	   is valid, it will try to restore deleted routes... Grr.
429 	   So that, this order is correct.
430 	 */
431 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
432 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
433 
434 	if (promote) {
435 		struct in_ifaddr *next_sec;
436 
437 		next_sec = rtnl_dereference(promote->ifa_next);
438 		if (prev_prom) {
439 			struct in_ifaddr *last_sec;
440 
441 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
442 
443 			last_sec = rtnl_dereference(last_prim->ifa_next);
444 			rcu_assign_pointer(promote->ifa_next, last_sec);
445 			rcu_assign_pointer(last_prim->ifa_next, promote);
446 		}
447 
448 		promote->ifa_flags &= ~IFA_F_SECONDARY;
449 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
450 		blocking_notifier_call_chain(&inetaddr_chain,
451 				NETDEV_UP, promote);
452 		for (ifa = next_sec; ifa;
453 		     ifa = rtnl_dereference(ifa->ifa_next)) {
454 			if (ifa1->ifa_mask != ifa->ifa_mask ||
455 			    !inet_ifa_match(ifa1->ifa_address, ifa))
456 					continue;
457 			fib_add_ifaddr(ifa);
458 		}
459 
460 	}
461 	if (destroy)
462 		inet_free_ifa(ifa1);
463 }
464 
465 static void inet_del_ifa(struct in_device *in_dev,
466 			 struct in_ifaddr __rcu **ifap,
467 			 int destroy)
468 {
469 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
470 }
471 
472 static void check_lifetime(struct work_struct *work);
473 
474 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
475 
476 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
477 			     u32 portid, struct netlink_ext_ack *extack)
478 {
479 	struct in_ifaddr __rcu **last_primary, **ifap;
480 	struct in_device *in_dev = ifa->ifa_dev;
481 	struct in_validator_info ivi;
482 	struct in_ifaddr *ifa1;
483 	int ret;
484 
485 	ASSERT_RTNL();
486 
487 	if (!ifa->ifa_local) {
488 		inet_free_ifa(ifa);
489 		return 0;
490 	}
491 
492 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
493 	last_primary = &in_dev->ifa_list;
494 
495 	/* Don't set IPv6 only flags to IPv4 addresses */
496 	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
497 
498 	ifap = &in_dev->ifa_list;
499 	ifa1 = rtnl_dereference(*ifap);
500 
501 	while (ifa1) {
502 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
503 		    ifa->ifa_scope <= ifa1->ifa_scope)
504 			last_primary = &ifa1->ifa_next;
505 		if (ifa1->ifa_mask == ifa->ifa_mask &&
506 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
507 			if (ifa1->ifa_local == ifa->ifa_local) {
508 				inet_free_ifa(ifa);
509 				return -EEXIST;
510 			}
511 			if (ifa1->ifa_scope != ifa->ifa_scope) {
512 				inet_free_ifa(ifa);
513 				return -EINVAL;
514 			}
515 			ifa->ifa_flags |= IFA_F_SECONDARY;
516 		}
517 
518 		ifap = &ifa1->ifa_next;
519 		ifa1 = rtnl_dereference(*ifap);
520 	}
521 
522 	/* Allow any devices that wish to register ifaddr validtors to weigh
523 	 * in now, before changes are committed.  The rntl lock is serializing
524 	 * access here, so the state should not change between a validator call
525 	 * and a final notify on commit.  This isn't invoked on promotion under
526 	 * the assumption that validators are checking the address itself, and
527 	 * not the flags.
528 	 */
529 	ivi.ivi_addr = ifa->ifa_address;
530 	ivi.ivi_dev = ifa->ifa_dev;
531 	ivi.extack = extack;
532 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
533 					   NETDEV_UP, &ivi);
534 	ret = notifier_to_errno(ret);
535 	if (ret) {
536 		inet_free_ifa(ifa);
537 		return ret;
538 	}
539 
540 	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
541 		ifap = last_primary;
542 
543 	rcu_assign_pointer(ifa->ifa_next, *ifap);
544 	rcu_assign_pointer(*ifap, ifa);
545 
546 	inet_hash_insert(dev_net(in_dev->dev), ifa);
547 
548 	cancel_delayed_work(&check_lifetime_work);
549 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
550 
551 	/* Send message first, then call notifier.
552 	   Notifier will trigger FIB update, so that
553 	   listeners of netlink will know about new ifaddr */
554 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
555 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
556 
557 	return 0;
558 }
559 
560 static int inet_insert_ifa(struct in_ifaddr *ifa)
561 {
562 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
563 }
564 
565 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
566 {
567 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
568 
569 	ASSERT_RTNL();
570 
571 	if (!in_dev) {
572 		inet_free_ifa(ifa);
573 		return -ENOBUFS;
574 	}
575 	ipv4_devconf_setall(in_dev);
576 	neigh_parms_data_state_setall(in_dev->arp_parms);
577 	if (ifa->ifa_dev != in_dev) {
578 		WARN_ON(ifa->ifa_dev);
579 		in_dev_hold(in_dev);
580 		ifa->ifa_dev = in_dev;
581 	}
582 	if (ipv4_is_loopback(ifa->ifa_local))
583 		ifa->ifa_scope = RT_SCOPE_HOST;
584 	return inet_insert_ifa(ifa);
585 }
586 
587 /* Caller must hold RCU or RTNL :
588  * We dont take a reference on found in_device
589  */
590 struct in_device *inetdev_by_index(struct net *net, int ifindex)
591 {
592 	struct net_device *dev;
593 	struct in_device *in_dev = NULL;
594 
595 	rcu_read_lock();
596 	dev = dev_get_by_index_rcu(net, ifindex);
597 	if (dev)
598 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
599 	rcu_read_unlock();
600 	return in_dev;
601 }
602 EXPORT_SYMBOL(inetdev_by_index);
603 
604 /* Called only from RTNL semaphored context. No locks. */
605 
606 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
607 				    __be32 mask)
608 {
609 	struct in_ifaddr *ifa;
610 
611 	ASSERT_RTNL();
612 
613 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
614 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
615 			return ifa;
616 	}
617 	return NULL;
618 }
619 
620 static int ip_mc_autojoin_config(struct net *net, bool join,
621 				 const struct in_ifaddr *ifa)
622 {
623 #if defined(CONFIG_IP_MULTICAST)
624 	struct ip_mreqn mreq = {
625 		.imr_multiaddr.s_addr = ifa->ifa_address,
626 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
627 	};
628 	struct sock *sk = net->ipv4.mc_autojoin_sk;
629 	int ret;
630 
631 	ASSERT_RTNL();
632 
633 	lock_sock(sk);
634 	if (join)
635 		ret = ip_mc_join_group(sk, &mreq);
636 	else
637 		ret = ip_mc_leave_group(sk, &mreq);
638 	release_sock(sk);
639 
640 	return ret;
641 #else
642 	return -EOPNOTSUPP;
643 #endif
644 }
645 
646 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
647 			    struct netlink_ext_ack *extack)
648 {
649 	struct net *net = sock_net(skb->sk);
650 	struct in_ifaddr __rcu **ifap;
651 	struct nlattr *tb[IFA_MAX+1];
652 	struct in_device *in_dev;
653 	struct ifaddrmsg *ifm;
654 	struct in_ifaddr *ifa;
655 	int err;
656 
657 	ASSERT_RTNL();
658 
659 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
660 				     ifa_ipv4_policy, extack);
661 	if (err < 0)
662 		goto errout;
663 
664 	ifm = nlmsg_data(nlh);
665 	in_dev = inetdev_by_index(net, ifm->ifa_index);
666 	if (!in_dev) {
667 		err = -ENODEV;
668 		goto errout;
669 	}
670 
671 	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
672 	     ifap = &ifa->ifa_next) {
673 		if (tb[IFA_LOCAL] &&
674 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
675 			continue;
676 
677 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
678 			continue;
679 
680 		if (tb[IFA_ADDRESS] &&
681 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
682 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
683 			continue;
684 
685 		if (ipv4_is_multicast(ifa->ifa_address))
686 			ip_mc_autojoin_config(net, false, ifa);
687 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
688 		return 0;
689 	}
690 
691 	err = -EADDRNOTAVAIL;
692 errout:
693 	return err;
694 }
695 
696 #define INFINITY_LIFE_TIME	0xFFFFFFFF
697 
698 static void check_lifetime(struct work_struct *work)
699 {
700 	unsigned long now, next, next_sec, next_sched;
701 	struct in_ifaddr *ifa;
702 	struct hlist_node *n;
703 	int i;
704 
705 	now = jiffies;
706 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
707 
708 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
709 		bool change_needed = false;
710 
711 		rcu_read_lock();
712 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
713 			unsigned long age;
714 
715 			if (ifa->ifa_flags & IFA_F_PERMANENT)
716 				continue;
717 
718 			/* We try to batch several events at once. */
719 			age = (now - ifa->ifa_tstamp +
720 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
721 
722 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
723 			    age >= ifa->ifa_valid_lft) {
724 				change_needed = true;
725 			} else if (ifa->ifa_preferred_lft ==
726 				   INFINITY_LIFE_TIME) {
727 				continue;
728 			} else if (age >= ifa->ifa_preferred_lft) {
729 				if (time_before(ifa->ifa_tstamp +
730 						ifa->ifa_valid_lft * HZ, next))
731 					next = ifa->ifa_tstamp +
732 					       ifa->ifa_valid_lft * HZ;
733 
734 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
735 					change_needed = true;
736 			} else if (time_before(ifa->ifa_tstamp +
737 					       ifa->ifa_preferred_lft * HZ,
738 					       next)) {
739 				next = ifa->ifa_tstamp +
740 				       ifa->ifa_preferred_lft * HZ;
741 			}
742 		}
743 		rcu_read_unlock();
744 		if (!change_needed)
745 			continue;
746 		rtnl_lock();
747 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
748 			unsigned long age;
749 
750 			if (ifa->ifa_flags & IFA_F_PERMANENT)
751 				continue;
752 
753 			/* We try to batch several events at once. */
754 			age = (now - ifa->ifa_tstamp +
755 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
756 
757 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
758 			    age >= ifa->ifa_valid_lft) {
759 				struct in_ifaddr __rcu **ifap;
760 				struct in_ifaddr *tmp;
761 
762 				ifap = &ifa->ifa_dev->ifa_list;
763 				tmp = rtnl_dereference(*ifap);
764 				while (tmp) {
765 					if (tmp == ifa) {
766 						inet_del_ifa(ifa->ifa_dev,
767 							     ifap, 1);
768 						break;
769 					}
770 					ifap = &tmp->ifa_next;
771 					tmp = rtnl_dereference(*ifap);
772 				}
773 			} else if (ifa->ifa_preferred_lft !=
774 				   INFINITY_LIFE_TIME &&
775 				   age >= ifa->ifa_preferred_lft &&
776 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
777 				ifa->ifa_flags |= IFA_F_DEPRECATED;
778 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
779 			}
780 		}
781 		rtnl_unlock();
782 	}
783 
784 	next_sec = round_jiffies_up(next);
785 	next_sched = next;
786 
787 	/* If rounded timeout is accurate enough, accept it. */
788 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
789 		next_sched = next_sec;
790 
791 	now = jiffies;
792 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
793 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
794 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
795 
796 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
797 			next_sched - now);
798 }
799 
800 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
801 			     __u32 prefered_lft)
802 {
803 	unsigned long timeout;
804 
805 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
806 
807 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
808 	if (addrconf_finite_timeout(timeout))
809 		ifa->ifa_valid_lft = timeout;
810 	else
811 		ifa->ifa_flags |= IFA_F_PERMANENT;
812 
813 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
814 	if (addrconf_finite_timeout(timeout)) {
815 		if (timeout == 0)
816 			ifa->ifa_flags |= IFA_F_DEPRECATED;
817 		ifa->ifa_preferred_lft = timeout;
818 	}
819 	ifa->ifa_tstamp = jiffies;
820 	if (!ifa->ifa_cstamp)
821 		ifa->ifa_cstamp = ifa->ifa_tstamp;
822 }
823 
824 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
825 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
826 				       struct netlink_ext_ack *extack)
827 {
828 	struct nlattr *tb[IFA_MAX+1];
829 	struct in_ifaddr *ifa;
830 	struct ifaddrmsg *ifm;
831 	struct net_device *dev;
832 	struct in_device *in_dev;
833 	int err;
834 
835 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
836 				     ifa_ipv4_policy, extack);
837 	if (err < 0)
838 		goto errout;
839 
840 	ifm = nlmsg_data(nlh);
841 	err = -EINVAL;
842 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
843 		goto errout;
844 
845 	dev = __dev_get_by_index(net, ifm->ifa_index);
846 	err = -ENODEV;
847 	if (!dev)
848 		goto errout;
849 
850 	in_dev = __in_dev_get_rtnl(dev);
851 	err = -ENOBUFS;
852 	if (!in_dev)
853 		goto errout;
854 
855 	ifa = inet_alloc_ifa();
856 	if (!ifa)
857 		/*
858 		 * A potential indev allocation can be left alive, it stays
859 		 * assigned to its device and is destroy with it.
860 		 */
861 		goto errout;
862 
863 	ipv4_devconf_setall(in_dev);
864 	neigh_parms_data_state_setall(in_dev->arp_parms);
865 	in_dev_hold(in_dev);
866 
867 	if (!tb[IFA_ADDRESS])
868 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
869 
870 	INIT_HLIST_NODE(&ifa->hash);
871 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
872 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
873 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
874 					 ifm->ifa_flags;
875 	ifa->ifa_scope = ifm->ifa_scope;
876 	ifa->ifa_dev = in_dev;
877 
878 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
879 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
880 
881 	if (tb[IFA_BROADCAST])
882 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
883 
884 	if (tb[IFA_LABEL])
885 		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
886 	else
887 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
888 
889 	if (tb[IFA_RT_PRIORITY])
890 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
891 
892 	if (tb[IFA_PROTO])
893 		ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
894 
895 	if (tb[IFA_CACHEINFO]) {
896 		struct ifa_cacheinfo *ci;
897 
898 		ci = nla_data(tb[IFA_CACHEINFO]);
899 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
900 			err = -EINVAL;
901 			goto errout_free;
902 		}
903 		*pvalid_lft = ci->ifa_valid;
904 		*pprefered_lft = ci->ifa_prefered;
905 	}
906 
907 	return ifa;
908 
909 errout_free:
910 	inet_free_ifa(ifa);
911 errout:
912 	return ERR_PTR(err);
913 }
914 
915 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
916 {
917 	struct in_device *in_dev = ifa->ifa_dev;
918 	struct in_ifaddr *ifa1;
919 
920 	if (!ifa->ifa_local)
921 		return NULL;
922 
923 	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
924 		if (ifa1->ifa_mask == ifa->ifa_mask &&
925 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
926 		    ifa1->ifa_local == ifa->ifa_local)
927 			return ifa1;
928 	}
929 	return NULL;
930 }
931 
932 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
933 			    struct netlink_ext_ack *extack)
934 {
935 	struct net *net = sock_net(skb->sk);
936 	struct in_ifaddr *ifa;
937 	struct in_ifaddr *ifa_existing;
938 	__u32 valid_lft = INFINITY_LIFE_TIME;
939 	__u32 prefered_lft = INFINITY_LIFE_TIME;
940 
941 	ASSERT_RTNL();
942 
943 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
944 	if (IS_ERR(ifa))
945 		return PTR_ERR(ifa);
946 
947 	ifa_existing = find_matching_ifa(ifa);
948 	if (!ifa_existing) {
949 		/* It would be best to check for !NLM_F_CREATE here but
950 		 * userspace already relies on not having to provide this.
951 		 */
952 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
953 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
954 			int ret = ip_mc_autojoin_config(net, true, ifa);
955 
956 			if (ret < 0) {
957 				inet_free_ifa(ifa);
958 				return ret;
959 			}
960 		}
961 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
962 					 extack);
963 	} else {
964 		u32 new_metric = ifa->ifa_rt_priority;
965 		u8 new_proto = ifa->ifa_proto;
966 
967 		inet_free_ifa(ifa);
968 
969 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
970 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
971 			return -EEXIST;
972 		ifa = ifa_existing;
973 
974 		if (ifa->ifa_rt_priority != new_metric) {
975 			fib_modify_prefix_metric(ifa, new_metric);
976 			ifa->ifa_rt_priority = new_metric;
977 		}
978 
979 		ifa->ifa_proto = new_proto;
980 
981 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
982 		cancel_delayed_work(&check_lifetime_work);
983 		queue_delayed_work(system_power_efficient_wq,
984 				&check_lifetime_work, 0);
985 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
986 	}
987 	return 0;
988 }
989 
990 /*
991  *	Determine a default network mask, based on the IP address.
992  */
993 
994 static int inet_abc_len(__be32 addr)
995 {
996 	int rc = -1;	/* Something else, probably a multicast. */
997 
998 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
999 		rc = 0;
1000 	else {
1001 		__u32 haddr = ntohl(addr);
1002 		if (IN_CLASSA(haddr))
1003 			rc = 8;
1004 		else if (IN_CLASSB(haddr))
1005 			rc = 16;
1006 		else if (IN_CLASSC(haddr))
1007 			rc = 24;
1008 		else if (IN_CLASSE(haddr))
1009 			rc = 32;
1010 	}
1011 
1012 	return rc;
1013 }
1014 
1015 
1016 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1017 {
1018 	struct sockaddr_in sin_orig;
1019 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1020 	struct in_ifaddr __rcu **ifap = NULL;
1021 	struct in_device *in_dev;
1022 	struct in_ifaddr *ifa = NULL;
1023 	struct net_device *dev;
1024 	char *colon;
1025 	int ret = -EFAULT;
1026 	int tryaddrmatch = 0;
1027 
1028 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1029 
1030 	/* save original address for comparison */
1031 	memcpy(&sin_orig, sin, sizeof(*sin));
1032 
1033 	colon = strchr(ifr->ifr_name, ':');
1034 	if (colon)
1035 		*colon = 0;
1036 
1037 	dev_load(net, ifr->ifr_name);
1038 
1039 	switch (cmd) {
1040 	case SIOCGIFADDR:	/* Get interface address */
1041 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1042 	case SIOCGIFDSTADDR:	/* Get the destination address */
1043 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1044 		/* Note that these ioctls will not sleep,
1045 		   so that we do not impose a lock.
1046 		   One day we will be forced to put shlock here (I mean SMP)
1047 		 */
1048 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1049 		memset(sin, 0, sizeof(*sin));
1050 		sin->sin_family = AF_INET;
1051 		break;
1052 
1053 	case SIOCSIFFLAGS:
1054 		ret = -EPERM;
1055 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1056 			goto out;
1057 		break;
1058 	case SIOCSIFADDR:	/* Set interface address (and family) */
1059 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1060 	case SIOCSIFDSTADDR:	/* Set the destination address */
1061 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1062 		ret = -EPERM;
1063 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1064 			goto out;
1065 		ret = -EINVAL;
1066 		if (sin->sin_family != AF_INET)
1067 			goto out;
1068 		break;
1069 	default:
1070 		ret = -EINVAL;
1071 		goto out;
1072 	}
1073 
1074 	rtnl_lock();
1075 
1076 	ret = -ENODEV;
1077 	dev = __dev_get_by_name(net, ifr->ifr_name);
1078 	if (!dev)
1079 		goto done;
1080 
1081 	if (colon)
1082 		*colon = ':';
1083 
1084 	in_dev = __in_dev_get_rtnl(dev);
1085 	if (in_dev) {
1086 		if (tryaddrmatch) {
1087 			/* Matthias Andree */
1088 			/* compare label and address (4.4BSD style) */
1089 			/* note: we only do this for a limited set of ioctls
1090 			   and only if the original address family was AF_INET.
1091 			   This is checked above. */
1092 
1093 			for (ifap = &in_dev->ifa_list;
1094 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1095 			     ifap = &ifa->ifa_next) {
1096 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1097 				    sin_orig.sin_addr.s_addr ==
1098 							ifa->ifa_local) {
1099 					break; /* found */
1100 				}
1101 			}
1102 		}
1103 		/* we didn't get a match, maybe the application is
1104 		   4.3BSD-style and passed in junk so we fall back to
1105 		   comparing just the label */
1106 		if (!ifa) {
1107 			for (ifap = &in_dev->ifa_list;
1108 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1109 			     ifap = &ifa->ifa_next)
1110 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1111 					break;
1112 		}
1113 	}
1114 
1115 	ret = -EADDRNOTAVAIL;
1116 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1117 		goto done;
1118 
1119 	switch (cmd) {
1120 	case SIOCGIFADDR:	/* Get interface address */
1121 		ret = 0;
1122 		sin->sin_addr.s_addr = ifa->ifa_local;
1123 		break;
1124 
1125 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1126 		ret = 0;
1127 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1128 		break;
1129 
1130 	case SIOCGIFDSTADDR:	/* Get the destination address */
1131 		ret = 0;
1132 		sin->sin_addr.s_addr = ifa->ifa_address;
1133 		break;
1134 
1135 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1136 		ret = 0;
1137 		sin->sin_addr.s_addr = ifa->ifa_mask;
1138 		break;
1139 
1140 	case SIOCSIFFLAGS:
1141 		if (colon) {
1142 			ret = -EADDRNOTAVAIL;
1143 			if (!ifa)
1144 				break;
1145 			ret = 0;
1146 			if (!(ifr->ifr_flags & IFF_UP))
1147 				inet_del_ifa(in_dev, ifap, 1);
1148 			break;
1149 		}
1150 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1151 		break;
1152 
1153 	case SIOCSIFADDR:	/* Set interface address (and family) */
1154 		ret = -EINVAL;
1155 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1156 			break;
1157 
1158 		if (!ifa) {
1159 			ret = -ENOBUFS;
1160 			ifa = inet_alloc_ifa();
1161 			if (!ifa)
1162 				break;
1163 			INIT_HLIST_NODE(&ifa->hash);
1164 			if (colon)
1165 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1166 			else
1167 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1168 		} else {
1169 			ret = 0;
1170 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1171 				break;
1172 			inet_del_ifa(in_dev, ifap, 0);
1173 			ifa->ifa_broadcast = 0;
1174 			ifa->ifa_scope = 0;
1175 		}
1176 
1177 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1178 
1179 		if (!(dev->flags & IFF_POINTOPOINT)) {
1180 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1181 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1182 			if ((dev->flags & IFF_BROADCAST) &&
1183 			    ifa->ifa_prefixlen < 31)
1184 				ifa->ifa_broadcast = ifa->ifa_address |
1185 						     ~ifa->ifa_mask;
1186 		} else {
1187 			ifa->ifa_prefixlen = 32;
1188 			ifa->ifa_mask = inet_make_mask(32);
1189 		}
1190 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1191 		ret = inet_set_ifa(dev, ifa);
1192 		break;
1193 
1194 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1195 		ret = 0;
1196 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1197 			inet_del_ifa(in_dev, ifap, 0);
1198 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1199 			inet_insert_ifa(ifa);
1200 		}
1201 		break;
1202 
1203 	case SIOCSIFDSTADDR:	/* Set the destination address */
1204 		ret = 0;
1205 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1206 			break;
1207 		ret = -EINVAL;
1208 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1209 			break;
1210 		ret = 0;
1211 		inet_del_ifa(in_dev, ifap, 0);
1212 		ifa->ifa_address = sin->sin_addr.s_addr;
1213 		inet_insert_ifa(ifa);
1214 		break;
1215 
1216 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1217 
1218 		/*
1219 		 *	The mask we set must be legal.
1220 		 */
1221 		ret = -EINVAL;
1222 		if (bad_mask(sin->sin_addr.s_addr, 0))
1223 			break;
1224 		ret = 0;
1225 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1226 			__be32 old_mask = ifa->ifa_mask;
1227 			inet_del_ifa(in_dev, ifap, 0);
1228 			ifa->ifa_mask = sin->sin_addr.s_addr;
1229 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1230 
1231 			/* See if current broadcast address matches
1232 			 * with current netmask, then recalculate
1233 			 * the broadcast address. Otherwise it's a
1234 			 * funny address, so don't touch it since
1235 			 * the user seems to know what (s)he's doing...
1236 			 */
1237 			if ((dev->flags & IFF_BROADCAST) &&
1238 			    (ifa->ifa_prefixlen < 31) &&
1239 			    (ifa->ifa_broadcast ==
1240 			     (ifa->ifa_local|~old_mask))) {
1241 				ifa->ifa_broadcast = (ifa->ifa_local |
1242 						      ~sin->sin_addr.s_addr);
1243 			}
1244 			inet_insert_ifa(ifa);
1245 		}
1246 		break;
1247 	}
1248 done:
1249 	rtnl_unlock();
1250 out:
1251 	return ret;
1252 }
1253 
1254 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1255 {
1256 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1257 	const struct in_ifaddr *ifa;
1258 	struct ifreq ifr;
1259 	int done = 0;
1260 
1261 	if (WARN_ON(size > sizeof(struct ifreq)))
1262 		goto out;
1263 
1264 	if (!in_dev)
1265 		goto out;
1266 
1267 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1268 		if (!buf) {
1269 			done += size;
1270 			continue;
1271 		}
1272 		if (len < size)
1273 			break;
1274 		memset(&ifr, 0, sizeof(struct ifreq));
1275 		strcpy(ifr.ifr_name, ifa->ifa_label);
1276 
1277 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1278 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1279 								ifa->ifa_local;
1280 
1281 		if (copy_to_user(buf + done, &ifr, size)) {
1282 			done = -EFAULT;
1283 			break;
1284 		}
1285 		len  -= size;
1286 		done += size;
1287 	}
1288 out:
1289 	return done;
1290 }
1291 
1292 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1293 				 int scope)
1294 {
1295 	const struct in_ifaddr *ifa;
1296 
1297 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1298 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1299 			continue;
1300 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1301 		    ifa->ifa_scope <= scope)
1302 			return ifa->ifa_local;
1303 	}
1304 
1305 	return 0;
1306 }
1307 
1308 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1309 {
1310 	const struct in_ifaddr *ifa;
1311 	__be32 addr = 0;
1312 	unsigned char localnet_scope = RT_SCOPE_HOST;
1313 	struct in_device *in_dev;
1314 	struct net *net = dev_net(dev);
1315 	int master_idx;
1316 
1317 	rcu_read_lock();
1318 	in_dev = __in_dev_get_rcu(dev);
1319 	if (!in_dev)
1320 		goto no_in_dev;
1321 
1322 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1323 		localnet_scope = RT_SCOPE_LINK;
1324 
1325 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1326 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1327 			continue;
1328 		if (min(ifa->ifa_scope, localnet_scope) > scope)
1329 			continue;
1330 		if (!dst || inet_ifa_match(dst, ifa)) {
1331 			addr = ifa->ifa_local;
1332 			break;
1333 		}
1334 		if (!addr)
1335 			addr = ifa->ifa_local;
1336 	}
1337 
1338 	if (addr)
1339 		goto out_unlock;
1340 no_in_dev:
1341 	master_idx = l3mdev_master_ifindex_rcu(dev);
1342 
1343 	/* For VRFs, the VRF device takes the place of the loopback device,
1344 	 * with addresses on it being preferred.  Note in such cases the
1345 	 * loopback device will be among the devices that fail the master_idx
1346 	 * equality check in the loop below.
1347 	 */
1348 	if (master_idx &&
1349 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1350 	    (in_dev = __in_dev_get_rcu(dev))) {
1351 		addr = in_dev_select_addr(in_dev, scope);
1352 		if (addr)
1353 			goto out_unlock;
1354 	}
1355 
1356 	/* Not loopback addresses on loopback should be preferred
1357 	   in this case. It is important that lo is the first interface
1358 	   in dev_base list.
1359 	 */
1360 	for_each_netdev_rcu(net, dev) {
1361 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1362 			continue;
1363 
1364 		in_dev = __in_dev_get_rcu(dev);
1365 		if (!in_dev)
1366 			continue;
1367 
1368 		addr = in_dev_select_addr(in_dev, scope);
1369 		if (addr)
1370 			goto out_unlock;
1371 	}
1372 out_unlock:
1373 	rcu_read_unlock();
1374 	return addr;
1375 }
1376 EXPORT_SYMBOL(inet_select_addr);
1377 
1378 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1379 			      __be32 local, int scope)
1380 {
1381 	unsigned char localnet_scope = RT_SCOPE_HOST;
1382 	const struct in_ifaddr *ifa;
1383 	__be32 addr = 0;
1384 	int same = 0;
1385 
1386 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1387 		localnet_scope = RT_SCOPE_LINK;
1388 
1389 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1390 		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1391 
1392 		if (!addr &&
1393 		    (local == ifa->ifa_local || !local) &&
1394 		    min_scope <= scope) {
1395 			addr = ifa->ifa_local;
1396 			if (same)
1397 				break;
1398 		}
1399 		if (!same) {
1400 			same = (!local || inet_ifa_match(local, ifa)) &&
1401 				(!dst || inet_ifa_match(dst, ifa));
1402 			if (same && addr) {
1403 				if (local || !dst)
1404 					break;
1405 				/* Is the selected addr into dst subnet? */
1406 				if (inet_ifa_match(addr, ifa))
1407 					break;
1408 				/* No, then can we use new local src? */
1409 				if (min_scope <= scope) {
1410 					addr = ifa->ifa_local;
1411 					break;
1412 				}
1413 				/* search for large dst subnet for addr */
1414 				same = 0;
1415 			}
1416 		}
1417 	}
1418 
1419 	return same ? addr : 0;
1420 }
1421 
1422 /*
1423  * Confirm that local IP address exists using wildcards:
1424  * - net: netns to check, cannot be NULL
1425  * - in_dev: only on this interface, NULL=any interface
1426  * - dst: only in the same subnet as dst, 0=any dst
1427  * - local: address, 0=autoselect the local address
1428  * - scope: maximum allowed scope value for the local address
1429  */
1430 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1431 			 __be32 dst, __be32 local, int scope)
1432 {
1433 	__be32 addr = 0;
1434 	struct net_device *dev;
1435 
1436 	if (in_dev)
1437 		return confirm_addr_indev(in_dev, dst, local, scope);
1438 
1439 	rcu_read_lock();
1440 	for_each_netdev_rcu(net, dev) {
1441 		in_dev = __in_dev_get_rcu(dev);
1442 		if (in_dev) {
1443 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1444 			if (addr)
1445 				break;
1446 		}
1447 	}
1448 	rcu_read_unlock();
1449 
1450 	return addr;
1451 }
1452 EXPORT_SYMBOL(inet_confirm_addr);
1453 
1454 /*
1455  *	Device notifier
1456  */
1457 
1458 int register_inetaddr_notifier(struct notifier_block *nb)
1459 {
1460 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1461 }
1462 EXPORT_SYMBOL(register_inetaddr_notifier);
1463 
1464 int unregister_inetaddr_notifier(struct notifier_block *nb)
1465 {
1466 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1467 }
1468 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1469 
1470 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1471 {
1472 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1473 }
1474 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1475 
1476 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1477 {
1478 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1479 	    nb);
1480 }
1481 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1482 
1483 /* Rename ifa_labels for a device name change. Make some effort to preserve
1484  * existing alias numbering and to create unique labels if possible.
1485 */
1486 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1487 {
1488 	struct in_ifaddr *ifa;
1489 	int named = 0;
1490 
1491 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1492 		char old[IFNAMSIZ], *dot;
1493 
1494 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1495 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1496 		if (named++ == 0)
1497 			goto skip;
1498 		dot = strchr(old, ':');
1499 		if (!dot) {
1500 			sprintf(old, ":%d", named);
1501 			dot = old;
1502 		}
1503 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1504 			strcat(ifa->ifa_label, dot);
1505 		else
1506 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1507 skip:
1508 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1509 	}
1510 }
1511 
1512 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1513 					struct in_device *in_dev)
1514 
1515 {
1516 	const struct in_ifaddr *ifa;
1517 
1518 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1519 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1520 			 ifa->ifa_local, dev,
1521 			 ifa->ifa_local, NULL,
1522 			 dev->dev_addr, NULL);
1523 	}
1524 }
1525 
1526 /* Called only under RTNL semaphore */
1527 
1528 static int inetdev_event(struct notifier_block *this, unsigned long event,
1529 			 void *ptr)
1530 {
1531 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1532 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1533 
1534 	ASSERT_RTNL();
1535 
1536 	if (!in_dev) {
1537 		if (event == NETDEV_REGISTER) {
1538 			in_dev = inetdev_init(dev);
1539 			if (IS_ERR(in_dev))
1540 				return notifier_from_errno(PTR_ERR(in_dev));
1541 			if (dev->flags & IFF_LOOPBACK) {
1542 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1543 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1544 			}
1545 		} else if (event == NETDEV_CHANGEMTU) {
1546 			/* Re-enabling IP */
1547 			if (inetdev_valid_mtu(dev->mtu))
1548 				in_dev = inetdev_init(dev);
1549 		}
1550 		goto out;
1551 	}
1552 
1553 	switch (event) {
1554 	case NETDEV_REGISTER:
1555 		pr_debug("%s: bug\n", __func__);
1556 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1557 		break;
1558 	case NETDEV_UP:
1559 		if (!inetdev_valid_mtu(dev->mtu))
1560 			break;
1561 		if (dev->flags & IFF_LOOPBACK) {
1562 			struct in_ifaddr *ifa = inet_alloc_ifa();
1563 
1564 			if (ifa) {
1565 				INIT_HLIST_NODE(&ifa->hash);
1566 				ifa->ifa_local =
1567 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1568 				ifa->ifa_prefixlen = 8;
1569 				ifa->ifa_mask = inet_make_mask(8);
1570 				in_dev_hold(in_dev);
1571 				ifa->ifa_dev = in_dev;
1572 				ifa->ifa_scope = RT_SCOPE_HOST;
1573 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1574 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1575 						 INFINITY_LIFE_TIME);
1576 				ipv4_devconf_setall(in_dev);
1577 				neigh_parms_data_state_setall(in_dev->arp_parms);
1578 				inet_insert_ifa(ifa);
1579 			}
1580 		}
1581 		ip_mc_up(in_dev);
1582 		fallthrough;
1583 	case NETDEV_CHANGEADDR:
1584 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1585 			break;
1586 		fallthrough;
1587 	case NETDEV_NOTIFY_PEERS:
1588 		/* Send gratuitous ARP to notify of link change */
1589 		inetdev_send_gratuitous_arp(dev, in_dev);
1590 		break;
1591 	case NETDEV_DOWN:
1592 		ip_mc_down(in_dev);
1593 		break;
1594 	case NETDEV_PRE_TYPE_CHANGE:
1595 		ip_mc_unmap(in_dev);
1596 		break;
1597 	case NETDEV_POST_TYPE_CHANGE:
1598 		ip_mc_remap(in_dev);
1599 		break;
1600 	case NETDEV_CHANGEMTU:
1601 		if (inetdev_valid_mtu(dev->mtu))
1602 			break;
1603 		/* disable IP when MTU is not enough */
1604 		fallthrough;
1605 	case NETDEV_UNREGISTER:
1606 		inetdev_destroy(in_dev);
1607 		break;
1608 	case NETDEV_CHANGENAME:
1609 		/* Do not notify about label change, this event is
1610 		 * not interesting to applications using netlink.
1611 		 */
1612 		inetdev_changename(dev, in_dev);
1613 
1614 		devinet_sysctl_unregister(in_dev);
1615 		devinet_sysctl_register(in_dev);
1616 		break;
1617 	}
1618 out:
1619 	return NOTIFY_DONE;
1620 }
1621 
1622 static struct notifier_block ip_netdev_notifier = {
1623 	.notifier_call = inetdev_event,
1624 };
1625 
1626 static size_t inet_nlmsg_size(void)
1627 {
1628 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1629 	       + nla_total_size(4) /* IFA_ADDRESS */
1630 	       + nla_total_size(4) /* IFA_LOCAL */
1631 	       + nla_total_size(4) /* IFA_BROADCAST */
1632 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1633 	       + nla_total_size(4)  /* IFA_FLAGS */
1634 	       + nla_total_size(1)  /* IFA_PROTO */
1635 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1636 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1637 }
1638 
1639 static inline u32 cstamp_delta(unsigned long cstamp)
1640 {
1641 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1642 }
1643 
1644 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1645 			 unsigned long tstamp, u32 preferred, u32 valid)
1646 {
1647 	struct ifa_cacheinfo ci;
1648 
1649 	ci.cstamp = cstamp_delta(cstamp);
1650 	ci.tstamp = cstamp_delta(tstamp);
1651 	ci.ifa_prefered = preferred;
1652 	ci.ifa_valid = valid;
1653 
1654 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1655 }
1656 
1657 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1658 			    struct inet_fill_args *args)
1659 {
1660 	struct ifaddrmsg *ifm;
1661 	struct nlmsghdr  *nlh;
1662 	u32 preferred, valid;
1663 
1664 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1665 			args->flags);
1666 	if (!nlh)
1667 		return -EMSGSIZE;
1668 
1669 	ifm = nlmsg_data(nlh);
1670 	ifm->ifa_family = AF_INET;
1671 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1672 	ifm->ifa_flags = ifa->ifa_flags;
1673 	ifm->ifa_scope = ifa->ifa_scope;
1674 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1675 
1676 	if (args->netnsid >= 0 &&
1677 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1678 		goto nla_put_failure;
1679 
1680 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1681 		preferred = ifa->ifa_preferred_lft;
1682 		valid = ifa->ifa_valid_lft;
1683 		if (preferred != INFINITY_LIFE_TIME) {
1684 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1685 
1686 			if (preferred > tval)
1687 				preferred -= tval;
1688 			else
1689 				preferred = 0;
1690 			if (valid != INFINITY_LIFE_TIME) {
1691 				if (valid > tval)
1692 					valid -= tval;
1693 				else
1694 					valid = 0;
1695 			}
1696 		}
1697 	} else {
1698 		preferred = INFINITY_LIFE_TIME;
1699 		valid = INFINITY_LIFE_TIME;
1700 	}
1701 	if ((ifa->ifa_address &&
1702 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1703 	    (ifa->ifa_local &&
1704 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1705 	    (ifa->ifa_broadcast &&
1706 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1707 	    (ifa->ifa_label[0] &&
1708 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1709 	    (ifa->ifa_proto &&
1710 	     nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1711 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1712 	    (ifa->ifa_rt_priority &&
1713 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1714 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1715 			  preferred, valid))
1716 		goto nla_put_failure;
1717 
1718 	nlmsg_end(skb, nlh);
1719 	return 0;
1720 
1721 nla_put_failure:
1722 	nlmsg_cancel(skb, nlh);
1723 	return -EMSGSIZE;
1724 }
1725 
1726 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1727 				      struct inet_fill_args *fillargs,
1728 				      struct net **tgt_net, struct sock *sk,
1729 				      struct netlink_callback *cb)
1730 {
1731 	struct netlink_ext_ack *extack = cb->extack;
1732 	struct nlattr *tb[IFA_MAX+1];
1733 	struct ifaddrmsg *ifm;
1734 	int err, i;
1735 
1736 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1737 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1738 		return -EINVAL;
1739 	}
1740 
1741 	ifm = nlmsg_data(nlh);
1742 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1743 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1744 		return -EINVAL;
1745 	}
1746 
1747 	fillargs->ifindex = ifm->ifa_index;
1748 	if (fillargs->ifindex) {
1749 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1750 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1751 	}
1752 
1753 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1754 					    ifa_ipv4_policy, extack);
1755 	if (err < 0)
1756 		return err;
1757 
1758 	for (i = 0; i <= IFA_MAX; ++i) {
1759 		if (!tb[i])
1760 			continue;
1761 
1762 		if (i == IFA_TARGET_NETNSID) {
1763 			struct net *net;
1764 
1765 			fillargs->netnsid = nla_get_s32(tb[i]);
1766 
1767 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1768 			if (IS_ERR(net)) {
1769 				fillargs->netnsid = -1;
1770 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1771 				return PTR_ERR(net);
1772 			}
1773 			*tgt_net = net;
1774 		} else {
1775 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1776 			return -EINVAL;
1777 		}
1778 	}
1779 
1780 	return 0;
1781 }
1782 
1783 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1784 			    struct netlink_callback *cb, int s_ip_idx,
1785 			    struct inet_fill_args *fillargs)
1786 {
1787 	struct in_ifaddr *ifa;
1788 	int ip_idx = 0;
1789 	int err;
1790 
1791 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1792 		if (ip_idx < s_ip_idx) {
1793 			ip_idx++;
1794 			continue;
1795 		}
1796 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1797 		if (err < 0)
1798 			goto done;
1799 
1800 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1801 		ip_idx++;
1802 	}
1803 	err = 0;
1804 
1805 done:
1806 	cb->args[2] = ip_idx;
1807 
1808 	return err;
1809 }
1810 
1811 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1812 {
1813 	const struct nlmsghdr *nlh = cb->nlh;
1814 	struct inet_fill_args fillargs = {
1815 		.portid = NETLINK_CB(cb->skb).portid,
1816 		.seq = nlh->nlmsg_seq,
1817 		.event = RTM_NEWADDR,
1818 		.flags = NLM_F_MULTI,
1819 		.netnsid = -1,
1820 	};
1821 	struct net *net = sock_net(skb->sk);
1822 	struct net *tgt_net = net;
1823 	int h, s_h;
1824 	int idx, s_idx;
1825 	int s_ip_idx;
1826 	struct net_device *dev;
1827 	struct in_device *in_dev;
1828 	struct hlist_head *head;
1829 	int err = 0;
1830 
1831 	s_h = cb->args[0];
1832 	s_idx = idx = cb->args[1];
1833 	s_ip_idx = cb->args[2];
1834 
1835 	if (cb->strict_check) {
1836 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1837 						 skb->sk, cb);
1838 		if (err < 0)
1839 			goto put_tgt_net;
1840 
1841 		err = 0;
1842 		if (fillargs.ifindex) {
1843 			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1844 			if (!dev) {
1845 				err = -ENODEV;
1846 				goto put_tgt_net;
1847 			}
1848 
1849 			in_dev = __in_dev_get_rtnl(dev);
1850 			if (in_dev) {
1851 				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1852 						       &fillargs);
1853 			}
1854 			goto put_tgt_net;
1855 		}
1856 	}
1857 
1858 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1859 		idx = 0;
1860 		head = &tgt_net->dev_index_head[h];
1861 		rcu_read_lock();
1862 		cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1863 			  tgt_net->dev_base_seq;
1864 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1865 			if (idx < s_idx)
1866 				goto cont;
1867 			if (h > s_h || idx > s_idx)
1868 				s_ip_idx = 0;
1869 			in_dev = __in_dev_get_rcu(dev);
1870 			if (!in_dev)
1871 				goto cont;
1872 
1873 			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1874 					       &fillargs);
1875 			if (err < 0) {
1876 				rcu_read_unlock();
1877 				goto done;
1878 			}
1879 cont:
1880 			idx++;
1881 		}
1882 		rcu_read_unlock();
1883 	}
1884 
1885 done:
1886 	cb->args[0] = h;
1887 	cb->args[1] = idx;
1888 put_tgt_net:
1889 	if (fillargs.netnsid >= 0)
1890 		put_net(tgt_net);
1891 
1892 	return skb->len ? : err;
1893 }
1894 
1895 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1896 		      u32 portid)
1897 {
1898 	struct inet_fill_args fillargs = {
1899 		.portid = portid,
1900 		.seq = nlh ? nlh->nlmsg_seq : 0,
1901 		.event = event,
1902 		.flags = 0,
1903 		.netnsid = -1,
1904 	};
1905 	struct sk_buff *skb;
1906 	int err = -ENOBUFS;
1907 	struct net *net;
1908 
1909 	net = dev_net(ifa->ifa_dev->dev);
1910 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1911 	if (!skb)
1912 		goto errout;
1913 
1914 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1915 	if (err < 0) {
1916 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1917 		WARN_ON(err == -EMSGSIZE);
1918 		kfree_skb(skb);
1919 		goto errout;
1920 	}
1921 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1922 	return;
1923 errout:
1924 	if (err < 0)
1925 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1926 }
1927 
1928 static size_t inet_get_link_af_size(const struct net_device *dev,
1929 				    u32 ext_filter_mask)
1930 {
1931 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1932 
1933 	if (!in_dev)
1934 		return 0;
1935 
1936 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1937 }
1938 
1939 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1940 			     u32 ext_filter_mask)
1941 {
1942 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1943 	struct nlattr *nla;
1944 	int i;
1945 
1946 	if (!in_dev)
1947 		return -ENODATA;
1948 
1949 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1950 	if (!nla)
1951 		return -EMSGSIZE;
1952 
1953 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1954 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1955 
1956 	return 0;
1957 }
1958 
1959 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1960 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1961 };
1962 
1963 static int inet_validate_link_af(const struct net_device *dev,
1964 				 const struct nlattr *nla,
1965 				 struct netlink_ext_ack *extack)
1966 {
1967 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1968 	int err, rem;
1969 
1970 	if (dev && !__in_dev_get_rtnl(dev))
1971 		return -EAFNOSUPPORT;
1972 
1973 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1974 					  inet_af_policy, extack);
1975 	if (err < 0)
1976 		return err;
1977 
1978 	if (tb[IFLA_INET_CONF]) {
1979 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1980 			int cfgid = nla_type(a);
1981 
1982 			if (nla_len(a) < 4)
1983 				return -EINVAL;
1984 
1985 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1986 				return -EINVAL;
1987 		}
1988 	}
1989 
1990 	return 0;
1991 }
1992 
1993 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
1994 			    struct netlink_ext_ack *extack)
1995 {
1996 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1997 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1998 	int rem;
1999 
2000 	if (!in_dev)
2001 		return -EAFNOSUPPORT;
2002 
2003 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2004 		return -EINVAL;
2005 
2006 	if (tb[IFLA_INET_CONF]) {
2007 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2008 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2009 	}
2010 
2011 	return 0;
2012 }
2013 
2014 static int inet_netconf_msgsize_devconf(int type)
2015 {
2016 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2017 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2018 	bool all = false;
2019 
2020 	if (type == NETCONFA_ALL)
2021 		all = true;
2022 
2023 	if (all || type == NETCONFA_FORWARDING)
2024 		size += nla_total_size(4);
2025 	if (all || type == NETCONFA_RP_FILTER)
2026 		size += nla_total_size(4);
2027 	if (all || type == NETCONFA_MC_FORWARDING)
2028 		size += nla_total_size(4);
2029 	if (all || type == NETCONFA_BC_FORWARDING)
2030 		size += nla_total_size(4);
2031 	if (all || type == NETCONFA_PROXY_NEIGH)
2032 		size += nla_total_size(4);
2033 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2034 		size += nla_total_size(4);
2035 
2036 	return size;
2037 }
2038 
2039 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2040 				     struct ipv4_devconf *devconf, u32 portid,
2041 				     u32 seq, int event, unsigned int flags,
2042 				     int type)
2043 {
2044 	struct nlmsghdr  *nlh;
2045 	struct netconfmsg *ncm;
2046 	bool all = false;
2047 
2048 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2049 			flags);
2050 	if (!nlh)
2051 		return -EMSGSIZE;
2052 
2053 	if (type == NETCONFA_ALL)
2054 		all = true;
2055 
2056 	ncm = nlmsg_data(nlh);
2057 	ncm->ncm_family = AF_INET;
2058 
2059 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2060 		goto nla_put_failure;
2061 
2062 	if (!devconf)
2063 		goto out;
2064 
2065 	if ((all || type == NETCONFA_FORWARDING) &&
2066 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2067 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2068 		goto nla_put_failure;
2069 	if ((all || type == NETCONFA_RP_FILTER) &&
2070 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2071 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2072 		goto nla_put_failure;
2073 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2074 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2075 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2076 		goto nla_put_failure;
2077 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2078 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2079 			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2080 		goto nla_put_failure;
2081 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2082 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2083 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2084 		goto nla_put_failure;
2085 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2086 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2087 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2088 		goto nla_put_failure;
2089 
2090 out:
2091 	nlmsg_end(skb, nlh);
2092 	return 0;
2093 
2094 nla_put_failure:
2095 	nlmsg_cancel(skb, nlh);
2096 	return -EMSGSIZE;
2097 }
2098 
2099 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2100 				 int ifindex, struct ipv4_devconf *devconf)
2101 {
2102 	struct sk_buff *skb;
2103 	int err = -ENOBUFS;
2104 
2105 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2106 	if (!skb)
2107 		goto errout;
2108 
2109 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2110 					event, 0, type);
2111 	if (err < 0) {
2112 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2113 		WARN_ON(err == -EMSGSIZE);
2114 		kfree_skb(skb);
2115 		goto errout;
2116 	}
2117 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2118 	return;
2119 errout:
2120 	if (err < 0)
2121 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2122 }
2123 
2124 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2125 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2126 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2127 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2128 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2129 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2130 };
2131 
2132 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2133 				      const struct nlmsghdr *nlh,
2134 				      struct nlattr **tb,
2135 				      struct netlink_ext_ack *extack)
2136 {
2137 	int i, err;
2138 
2139 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2140 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2141 		return -EINVAL;
2142 	}
2143 
2144 	if (!netlink_strict_get_check(skb))
2145 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2146 					      tb, NETCONFA_MAX,
2147 					      devconf_ipv4_policy, extack);
2148 
2149 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2150 					    tb, NETCONFA_MAX,
2151 					    devconf_ipv4_policy, extack);
2152 	if (err)
2153 		return err;
2154 
2155 	for (i = 0; i <= NETCONFA_MAX; i++) {
2156 		if (!tb[i])
2157 			continue;
2158 
2159 		switch (i) {
2160 		case NETCONFA_IFINDEX:
2161 			break;
2162 		default:
2163 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2164 			return -EINVAL;
2165 		}
2166 	}
2167 
2168 	return 0;
2169 }
2170 
2171 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2172 				    struct nlmsghdr *nlh,
2173 				    struct netlink_ext_ack *extack)
2174 {
2175 	struct net *net = sock_net(in_skb->sk);
2176 	struct nlattr *tb[NETCONFA_MAX+1];
2177 	struct sk_buff *skb;
2178 	struct ipv4_devconf *devconf;
2179 	struct in_device *in_dev;
2180 	struct net_device *dev;
2181 	int ifindex;
2182 	int err;
2183 
2184 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2185 	if (err)
2186 		goto errout;
2187 
2188 	err = -EINVAL;
2189 	if (!tb[NETCONFA_IFINDEX])
2190 		goto errout;
2191 
2192 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2193 	switch (ifindex) {
2194 	case NETCONFA_IFINDEX_ALL:
2195 		devconf = net->ipv4.devconf_all;
2196 		break;
2197 	case NETCONFA_IFINDEX_DEFAULT:
2198 		devconf = net->ipv4.devconf_dflt;
2199 		break;
2200 	default:
2201 		dev = __dev_get_by_index(net, ifindex);
2202 		if (!dev)
2203 			goto errout;
2204 		in_dev = __in_dev_get_rtnl(dev);
2205 		if (!in_dev)
2206 			goto errout;
2207 		devconf = &in_dev->cnf;
2208 		break;
2209 	}
2210 
2211 	err = -ENOBUFS;
2212 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2213 	if (!skb)
2214 		goto errout;
2215 
2216 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2217 					NETLINK_CB(in_skb).portid,
2218 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2219 					NETCONFA_ALL);
2220 	if (err < 0) {
2221 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2222 		WARN_ON(err == -EMSGSIZE);
2223 		kfree_skb(skb);
2224 		goto errout;
2225 	}
2226 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2227 errout:
2228 	return err;
2229 }
2230 
2231 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2232 				     struct netlink_callback *cb)
2233 {
2234 	const struct nlmsghdr *nlh = cb->nlh;
2235 	struct net *net = sock_net(skb->sk);
2236 	int h, s_h;
2237 	int idx, s_idx;
2238 	struct net_device *dev;
2239 	struct in_device *in_dev;
2240 	struct hlist_head *head;
2241 
2242 	if (cb->strict_check) {
2243 		struct netlink_ext_ack *extack = cb->extack;
2244 		struct netconfmsg *ncm;
2245 
2246 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2247 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2248 			return -EINVAL;
2249 		}
2250 
2251 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2252 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2253 			return -EINVAL;
2254 		}
2255 	}
2256 
2257 	s_h = cb->args[0];
2258 	s_idx = idx = cb->args[1];
2259 
2260 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2261 		idx = 0;
2262 		head = &net->dev_index_head[h];
2263 		rcu_read_lock();
2264 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2265 			  net->dev_base_seq;
2266 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2267 			if (idx < s_idx)
2268 				goto cont;
2269 			in_dev = __in_dev_get_rcu(dev);
2270 			if (!in_dev)
2271 				goto cont;
2272 
2273 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2274 						      &in_dev->cnf,
2275 						      NETLINK_CB(cb->skb).portid,
2276 						      nlh->nlmsg_seq,
2277 						      RTM_NEWNETCONF,
2278 						      NLM_F_MULTI,
2279 						      NETCONFA_ALL) < 0) {
2280 				rcu_read_unlock();
2281 				goto done;
2282 			}
2283 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2284 cont:
2285 			idx++;
2286 		}
2287 		rcu_read_unlock();
2288 	}
2289 	if (h == NETDEV_HASHENTRIES) {
2290 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2291 					      net->ipv4.devconf_all,
2292 					      NETLINK_CB(cb->skb).portid,
2293 					      nlh->nlmsg_seq,
2294 					      RTM_NEWNETCONF, NLM_F_MULTI,
2295 					      NETCONFA_ALL) < 0)
2296 			goto done;
2297 		else
2298 			h++;
2299 	}
2300 	if (h == NETDEV_HASHENTRIES + 1) {
2301 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2302 					      net->ipv4.devconf_dflt,
2303 					      NETLINK_CB(cb->skb).portid,
2304 					      nlh->nlmsg_seq,
2305 					      RTM_NEWNETCONF, NLM_F_MULTI,
2306 					      NETCONFA_ALL) < 0)
2307 			goto done;
2308 		else
2309 			h++;
2310 	}
2311 done:
2312 	cb->args[0] = h;
2313 	cb->args[1] = idx;
2314 
2315 	return skb->len;
2316 }
2317 
2318 #ifdef CONFIG_SYSCTL
2319 
2320 static void devinet_copy_dflt_conf(struct net *net, int i)
2321 {
2322 	struct net_device *dev;
2323 
2324 	rcu_read_lock();
2325 	for_each_netdev_rcu(net, dev) {
2326 		struct in_device *in_dev;
2327 
2328 		in_dev = __in_dev_get_rcu(dev);
2329 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2330 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2331 	}
2332 	rcu_read_unlock();
2333 }
2334 
2335 /* called with RTNL locked */
2336 static void inet_forward_change(struct net *net)
2337 {
2338 	struct net_device *dev;
2339 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2340 
2341 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2342 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2343 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2344 				    NETCONFA_FORWARDING,
2345 				    NETCONFA_IFINDEX_ALL,
2346 				    net->ipv4.devconf_all);
2347 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2348 				    NETCONFA_FORWARDING,
2349 				    NETCONFA_IFINDEX_DEFAULT,
2350 				    net->ipv4.devconf_dflt);
2351 
2352 	for_each_netdev(net, dev) {
2353 		struct in_device *in_dev;
2354 
2355 		if (on)
2356 			dev_disable_lro(dev);
2357 
2358 		in_dev = __in_dev_get_rtnl(dev);
2359 		if (in_dev) {
2360 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2361 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2362 						    NETCONFA_FORWARDING,
2363 						    dev->ifindex, &in_dev->cnf);
2364 		}
2365 	}
2366 }
2367 
2368 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2369 {
2370 	if (cnf == net->ipv4.devconf_dflt)
2371 		return NETCONFA_IFINDEX_DEFAULT;
2372 	else if (cnf == net->ipv4.devconf_all)
2373 		return NETCONFA_IFINDEX_ALL;
2374 	else {
2375 		struct in_device *idev
2376 			= container_of(cnf, struct in_device, cnf);
2377 		return idev->dev->ifindex;
2378 	}
2379 }
2380 
2381 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2382 			     void *buffer, size_t *lenp, loff_t *ppos)
2383 {
2384 	int old_value = *(int *)ctl->data;
2385 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2386 	int new_value = *(int *)ctl->data;
2387 
2388 	if (write) {
2389 		struct ipv4_devconf *cnf = ctl->extra1;
2390 		struct net *net = ctl->extra2;
2391 		int i = (int *)ctl->data - cnf->data;
2392 		int ifindex;
2393 
2394 		set_bit(i, cnf->state);
2395 
2396 		if (cnf == net->ipv4.devconf_dflt)
2397 			devinet_copy_dflt_conf(net, i);
2398 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2399 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2400 			if ((new_value == 0) && (old_value != 0))
2401 				rt_cache_flush(net);
2402 
2403 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2404 		    new_value != old_value)
2405 			rt_cache_flush(net);
2406 
2407 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2408 		    new_value != old_value) {
2409 			ifindex = devinet_conf_ifindex(net, cnf);
2410 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2411 						    NETCONFA_RP_FILTER,
2412 						    ifindex, cnf);
2413 		}
2414 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2415 		    new_value != old_value) {
2416 			ifindex = devinet_conf_ifindex(net, cnf);
2417 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2418 						    NETCONFA_PROXY_NEIGH,
2419 						    ifindex, cnf);
2420 		}
2421 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2422 		    new_value != old_value) {
2423 			ifindex = devinet_conf_ifindex(net, cnf);
2424 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2425 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2426 						    ifindex, cnf);
2427 		}
2428 	}
2429 
2430 	return ret;
2431 }
2432 
2433 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2434 				  void *buffer, size_t *lenp, loff_t *ppos)
2435 {
2436 	int *valp = ctl->data;
2437 	int val = *valp;
2438 	loff_t pos = *ppos;
2439 	struct net *net = ctl->extra2;
2440 	int ret;
2441 
2442 	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2443 		return -EPERM;
2444 
2445 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2446 
2447 	if (write && *valp != val) {
2448 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2449 			if (!rtnl_trylock()) {
2450 				/* Restore the original values before restarting */
2451 				*valp = val;
2452 				*ppos = pos;
2453 				return restart_syscall();
2454 			}
2455 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2456 				inet_forward_change(net);
2457 			} else {
2458 				struct ipv4_devconf *cnf = ctl->extra1;
2459 				struct in_device *idev =
2460 					container_of(cnf, struct in_device, cnf);
2461 				if (*valp)
2462 					dev_disable_lro(idev->dev);
2463 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2464 							    NETCONFA_FORWARDING,
2465 							    idev->dev->ifindex,
2466 							    cnf);
2467 			}
2468 			rtnl_unlock();
2469 			rt_cache_flush(net);
2470 		} else
2471 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2472 						    NETCONFA_FORWARDING,
2473 						    NETCONFA_IFINDEX_DEFAULT,
2474 						    net->ipv4.devconf_dflt);
2475 	}
2476 
2477 	return ret;
2478 }
2479 
2480 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2481 				void *buffer, size_t *lenp, loff_t *ppos)
2482 {
2483 	int *valp = ctl->data;
2484 	int val = *valp;
2485 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2486 	struct net *net = ctl->extra2;
2487 
2488 	if (write && *valp != val)
2489 		rt_cache_flush(net);
2490 
2491 	return ret;
2492 }
2493 
2494 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2495 	{ \
2496 		.procname	= name, \
2497 		.data		= ipv4_devconf.data + \
2498 				  IPV4_DEVCONF_ ## attr - 1, \
2499 		.maxlen		= sizeof(int), \
2500 		.mode		= mval, \
2501 		.proc_handler	= proc, \
2502 		.extra1		= &ipv4_devconf, \
2503 	}
2504 
2505 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2506 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2507 
2508 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2509 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2510 
2511 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2512 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2513 
2514 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2515 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2516 
2517 static struct devinet_sysctl_table {
2518 	struct ctl_table_header *sysctl_header;
2519 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2520 } devinet_sysctl = {
2521 	.devinet_vars = {
2522 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2523 					     devinet_sysctl_forward),
2524 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2525 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2526 
2527 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2528 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2529 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2530 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2531 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2532 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2533 					"accept_source_route"),
2534 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2535 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2536 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2537 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2538 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2539 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2540 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2541 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2542 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2543 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2544 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2545 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2546 		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2547 					"arp_evict_nocarrier"),
2548 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2549 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2550 					"force_igmp_version"),
2551 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2552 					"igmpv2_unsolicited_report_interval"),
2553 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2554 					"igmpv3_unsolicited_report_interval"),
2555 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2556 					"ignore_routes_with_linkdown"),
2557 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2558 					"drop_gratuitous_arp"),
2559 
2560 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2561 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2562 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2563 					      "promote_secondaries"),
2564 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2565 					      "route_localnet"),
2566 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2567 					      "drop_unicast_in_l2_multicast"),
2568 	},
2569 };
2570 
2571 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2572 				     int ifindex, struct ipv4_devconf *p)
2573 {
2574 	int i;
2575 	struct devinet_sysctl_table *t;
2576 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2577 
2578 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2579 	if (!t)
2580 		goto out;
2581 
2582 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2583 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2584 		t->devinet_vars[i].extra1 = p;
2585 		t->devinet_vars[i].extra2 = net;
2586 	}
2587 
2588 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2589 
2590 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2591 	if (!t->sysctl_header)
2592 		goto free;
2593 
2594 	p->sysctl = t;
2595 
2596 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2597 				    ifindex, p);
2598 	return 0;
2599 
2600 free:
2601 	kfree(t);
2602 out:
2603 	return -ENOMEM;
2604 }
2605 
2606 static void __devinet_sysctl_unregister(struct net *net,
2607 					struct ipv4_devconf *cnf, int ifindex)
2608 {
2609 	struct devinet_sysctl_table *t = cnf->sysctl;
2610 
2611 	if (t) {
2612 		cnf->sysctl = NULL;
2613 		unregister_net_sysctl_table(t->sysctl_header);
2614 		kfree(t);
2615 	}
2616 
2617 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2618 }
2619 
2620 static int devinet_sysctl_register(struct in_device *idev)
2621 {
2622 	int err;
2623 
2624 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2625 		return -EINVAL;
2626 
2627 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2628 	if (err)
2629 		return err;
2630 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2631 					idev->dev->ifindex, &idev->cnf);
2632 	if (err)
2633 		neigh_sysctl_unregister(idev->arp_parms);
2634 	return err;
2635 }
2636 
2637 static void devinet_sysctl_unregister(struct in_device *idev)
2638 {
2639 	struct net *net = dev_net(idev->dev);
2640 
2641 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2642 	neigh_sysctl_unregister(idev->arp_parms);
2643 }
2644 
2645 static struct ctl_table ctl_forward_entry[] = {
2646 	{
2647 		.procname	= "ip_forward",
2648 		.data		= &ipv4_devconf.data[
2649 					IPV4_DEVCONF_FORWARDING - 1],
2650 		.maxlen		= sizeof(int),
2651 		.mode		= 0644,
2652 		.proc_handler	= devinet_sysctl_forward,
2653 		.extra1		= &ipv4_devconf,
2654 		.extra2		= &init_net,
2655 	},
2656 	{ },
2657 };
2658 #endif
2659 
2660 static __net_init int devinet_init_net(struct net *net)
2661 {
2662 	int err;
2663 	struct ipv4_devconf *all, *dflt;
2664 #ifdef CONFIG_SYSCTL
2665 	struct ctl_table *tbl;
2666 	struct ctl_table_header *forw_hdr;
2667 #endif
2668 
2669 	err = -ENOMEM;
2670 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2671 	if (!all)
2672 		goto err_alloc_all;
2673 
2674 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2675 	if (!dflt)
2676 		goto err_alloc_dflt;
2677 
2678 #ifdef CONFIG_SYSCTL
2679 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2680 	if (!tbl)
2681 		goto err_alloc_ctl;
2682 
2683 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2684 	tbl[0].extra1 = all;
2685 	tbl[0].extra2 = net;
2686 #endif
2687 
2688 	if (!net_eq(net, &init_net)) {
2689 		switch (net_inherit_devconf()) {
2690 		case 3:
2691 			/* copy from the current netns */
2692 			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2693 			       sizeof(ipv4_devconf));
2694 			memcpy(dflt,
2695 			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2696 			       sizeof(ipv4_devconf_dflt));
2697 			break;
2698 		case 0:
2699 		case 1:
2700 			/* copy from init_net */
2701 			memcpy(all, init_net.ipv4.devconf_all,
2702 			       sizeof(ipv4_devconf));
2703 			memcpy(dflt, init_net.ipv4.devconf_dflt,
2704 			       sizeof(ipv4_devconf_dflt));
2705 			break;
2706 		case 2:
2707 			/* use compiled values */
2708 			break;
2709 		}
2710 	}
2711 
2712 #ifdef CONFIG_SYSCTL
2713 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2714 	if (err < 0)
2715 		goto err_reg_all;
2716 
2717 	err = __devinet_sysctl_register(net, "default",
2718 					NETCONFA_IFINDEX_DEFAULT, dflt);
2719 	if (err < 0)
2720 		goto err_reg_dflt;
2721 
2722 	err = -ENOMEM;
2723 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2724 	if (!forw_hdr)
2725 		goto err_reg_ctl;
2726 	net->ipv4.forw_hdr = forw_hdr;
2727 #endif
2728 
2729 	net->ipv4.devconf_all = all;
2730 	net->ipv4.devconf_dflt = dflt;
2731 	return 0;
2732 
2733 #ifdef CONFIG_SYSCTL
2734 err_reg_ctl:
2735 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2736 err_reg_dflt:
2737 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2738 err_reg_all:
2739 	kfree(tbl);
2740 err_alloc_ctl:
2741 #endif
2742 	kfree(dflt);
2743 err_alloc_dflt:
2744 	kfree(all);
2745 err_alloc_all:
2746 	return err;
2747 }
2748 
2749 static __net_exit void devinet_exit_net(struct net *net)
2750 {
2751 #ifdef CONFIG_SYSCTL
2752 	struct ctl_table *tbl;
2753 
2754 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2755 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2756 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2757 				    NETCONFA_IFINDEX_DEFAULT);
2758 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2759 				    NETCONFA_IFINDEX_ALL);
2760 	kfree(tbl);
2761 #endif
2762 	kfree(net->ipv4.devconf_dflt);
2763 	kfree(net->ipv4.devconf_all);
2764 }
2765 
2766 static __net_initdata struct pernet_operations devinet_ops = {
2767 	.init = devinet_init_net,
2768 	.exit = devinet_exit_net,
2769 };
2770 
2771 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2772 	.family		  = AF_INET,
2773 	.fill_link_af	  = inet_fill_link_af,
2774 	.get_link_af_size = inet_get_link_af_size,
2775 	.validate_link_af = inet_validate_link_af,
2776 	.set_link_af	  = inet_set_link_af,
2777 };
2778 
2779 void __init devinet_init(void)
2780 {
2781 	int i;
2782 
2783 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2784 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2785 
2786 	register_pernet_subsys(&devinet_ops);
2787 	register_netdevice_notifier(&ip_netdev_notifier);
2788 
2789 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2790 
2791 	rtnl_af_register(&inet_af_ops);
2792 
2793 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2794 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2795 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2796 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2797 		      inet_netconf_dump_devconf, 0);
2798 }
2799