xref: /openbmc/linux/net/ipv4/devinet.c (revision 3bf90eca)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	NET3	IP device support routines.
4  *
5  *	Derived from the IP parts of dev.c 1.0.19
6  * 		Authors:	Ross Biro
7  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *	Additional Authors:
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *	Changes:
15  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16  *					lists.
17  *		Cyrus Durgin:		updated for kmod
18  *		Matthias Andree:	in devinet_ioctl, compare label and
19  *					address (4.4BSD alias style support),
20  *					fall back to comparing just the label
21  *					if no match found.
22  */
23 
24 
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56 
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64 
65 #define IPV6ONLY_FLAGS	\
66 		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79 	},
80 };
81 
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83 	.data = {
84 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92 	},
93 };
94 
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97 
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 	[IFA_LOCAL]     	= { .type = NLA_U32 },
100 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
101 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104 	[IFA_FLAGS]		= { .type = NLA_U32 },
105 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107 	[IFA_PROTO]		= { .type = NLA_U8 },
108 };
109 
110 struct inet_fill_args {
111 	u32 portid;
112 	u32 seq;
113 	int event;
114 	unsigned int flags;
115 	int netnsid;
116 	int ifindex;
117 };
118 
119 #define IN4_ADDR_HSIZE_SHIFT	8
120 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
121 
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123 
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
125 {
126 	u32 val = (__force u32) addr ^ net_hash_mix(net);
127 
128 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129 }
130 
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132 {
133 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
134 
135 	ASSERT_RTNL();
136 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137 }
138 
139 static void inet_hash_remove(struct in_ifaddr *ifa)
140 {
141 	ASSERT_RTNL();
142 	hlist_del_init_rcu(&ifa->hash);
143 }
144 
145 /**
146  * __ip_dev_find - find the first device with a given source address.
147  * @net: the net namespace
148  * @addr: the source address
149  * @devref: if true, take a reference on the found device
150  *
151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
152  */
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154 {
155 	struct net_device *result = NULL;
156 	struct in_ifaddr *ifa;
157 
158 	rcu_read_lock();
159 	ifa = inet_lookup_ifaddr_rcu(net, addr);
160 	if (!ifa) {
161 		struct flowi4 fl4 = { .daddr = addr };
162 		struct fib_result res = { 0 };
163 		struct fib_table *local;
164 
165 		/* Fallback to FIB local table so that communication
166 		 * over loopback subnets work.
167 		 */
168 		local = fib_get_table(net, RT_TABLE_LOCAL);
169 		if (local &&
170 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171 		    res.type == RTN_LOCAL)
172 			result = FIB_RES_DEV(res);
173 	} else {
174 		result = ifa->ifa_dev->dev;
175 	}
176 	if (result && devref)
177 		dev_hold(result);
178 	rcu_read_unlock();
179 	return result;
180 }
181 EXPORT_SYMBOL(__ip_dev_find);
182 
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185 {
186 	u32 hash = inet_addr_hash(net, addr);
187 	struct in_ifaddr *ifa;
188 
189 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190 		if (ifa->ifa_local == addr &&
191 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
192 			return ifa;
193 
194 	return NULL;
195 }
196 
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198 
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202 			 struct in_ifaddr __rcu **ifap,
203 			 int destroy);
204 #ifdef CONFIG_SYSCTL
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
207 #else
208 static int devinet_sysctl_register(struct in_device *idev)
209 {
210 	return 0;
211 }
212 static void devinet_sysctl_unregister(struct in_device *idev)
213 {
214 }
215 #endif
216 
217 /* Locks all the inet devices. */
218 
219 static struct in_ifaddr *inet_alloc_ifa(void)
220 {
221 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222 }
223 
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227 	if (ifa->ifa_dev)
228 		in_dev_put(ifa->ifa_dev);
229 	kfree(ifa);
230 }
231 
232 static void inet_free_ifa(struct in_ifaddr *ifa)
233 {
234 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
235 }
236 
237 static void in_dev_free_rcu(struct rcu_head *head)
238 {
239 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
240 
241 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
242 	kfree(idev);
243 }
244 
245 void in_dev_finish_destroy(struct in_device *idev)
246 {
247 	struct net_device *dev = idev->dev;
248 
249 	WARN_ON(idev->ifa_list);
250 	WARN_ON(idev->mc_list);
251 #ifdef NET_REFCNT_DEBUG
252 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
253 #endif
254 	netdev_put(dev, &idev->dev_tracker);
255 	if (!idev->dead)
256 		pr_err("Freeing alive in_device %p\n", idev);
257 	else
258 		call_rcu(&idev->rcu_head, in_dev_free_rcu);
259 }
260 EXPORT_SYMBOL(in_dev_finish_destroy);
261 
262 static struct in_device *inetdev_init(struct net_device *dev)
263 {
264 	struct in_device *in_dev;
265 	int err = -ENOMEM;
266 
267 	ASSERT_RTNL();
268 
269 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
270 	if (!in_dev)
271 		goto out;
272 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
273 			sizeof(in_dev->cnf));
274 	in_dev->cnf.sysctl = NULL;
275 	in_dev->dev = dev;
276 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
277 	if (!in_dev->arp_parms)
278 		goto out_kfree;
279 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
280 		dev_disable_lro(dev);
281 	/* Reference in_dev->dev */
282 	netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
283 	/* Account for reference dev->ip_ptr (below) */
284 	refcount_set(&in_dev->refcnt, 1);
285 
286 	err = devinet_sysctl_register(in_dev);
287 	if (err) {
288 		in_dev->dead = 1;
289 		neigh_parms_release(&arp_tbl, in_dev->arp_parms);
290 		in_dev_put(in_dev);
291 		in_dev = NULL;
292 		goto out;
293 	}
294 	ip_mc_init_dev(in_dev);
295 	if (dev->flags & IFF_UP)
296 		ip_mc_up(in_dev);
297 
298 	/* we can receive as soon as ip_ptr is set -- do this last */
299 	rcu_assign_pointer(dev->ip_ptr, in_dev);
300 out:
301 	return in_dev ?: ERR_PTR(err);
302 out_kfree:
303 	kfree(in_dev);
304 	in_dev = NULL;
305 	goto out;
306 }
307 
308 static void inetdev_destroy(struct in_device *in_dev)
309 {
310 	struct net_device *dev;
311 	struct in_ifaddr *ifa;
312 
313 	ASSERT_RTNL();
314 
315 	dev = in_dev->dev;
316 
317 	in_dev->dead = 1;
318 
319 	ip_mc_destroy_dev(in_dev);
320 
321 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
322 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
323 		inet_free_ifa(ifa);
324 	}
325 
326 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
327 
328 	devinet_sysctl_unregister(in_dev);
329 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
330 	arp_ifdown(dev);
331 
332 	in_dev_put(in_dev);
333 }
334 
335 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
336 {
337 	const struct in_ifaddr *ifa;
338 
339 	rcu_read_lock();
340 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
341 		if (inet_ifa_match(a, ifa)) {
342 			if (!b || inet_ifa_match(b, ifa)) {
343 				rcu_read_unlock();
344 				return 1;
345 			}
346 		}
347 	}
348 	rcu_read_unlock();
349 	return 0;
350 }
351 
352 static void __inet_del_ifa(struct in_device *in_dev,
353 			   struct in_ifaddr __rcu **ifap,
354 			   int destroy, struct nlmsghdr *nlh, u32 portid)
355 {
356 	struct in_ifaddr *promote = NULL;
357 	struct in_ifaddr *ifa, *ifa1;
358 	struct in_ifaddr *last_prim;
359 	struct in_ifaddr *prev_prom = NULL;
360 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
361 
362 	ASSERT_RTNL();
363 
364 	ifa1 = rtnl_dereference(*ifap);
365 	last_prim = rtnl_dereference(in_dev->ifa_list);
366 	if (in_dev->dead)
367 		goto no_promotions;
368 
369 	/* 1. Deleting primary ifaddr forces deletion all secondaries
370 	 * unless alias promotion is set
371 	 **/
372 
373 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
374 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
375 
376 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
377 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
378 			    ifa1->ifa_scope <= ifa->ifa_scope)
379 				last_prim = ifa;
380 
381 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
382 			    ifa1->ifa_mask != ifa->ifa_mask ||
383 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
384 				ifap1 = &ifa->ifa_next;
385 				prev_prom = ifa;
386 				continue;
387 			}
388 
389 			if (!do_promote) {
390 				inet_hash_remove(ifa);
391 				*ifap1 = ifa->ifa_next;
392 
393 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
394 				blocking_notifier_call_chain(&inetaddr_chain,
395 						NETDEV_DOWN, ifa);
396 				inet_free_ifa(ifa);
397 			} else {
398 				promote = ifa;
399 				break;
400 			}
401 		}
402 	}
403 
404 	/* On promotion all secondaries from subnet are changing
405 	 * the primary IP, we must remove all their routes silently
406 	 * and later to add them back with new prefsrc. Do this
407 	 * while all addresses are on the device list.
408 	 */
409 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
410 		if (ifa1->ifa_mask == ifa->ifa_mask &&
411 		    inet_ifa_match(ifa1->ifa_address, ifa))
412 			fib_del_ifaddr(ifa, ifa1);
413 	}
414 
415 no_promotions:
416 	/* 2. Unlink it */
417 
418 	*ifap = ifa1->ifa_next;
419 	inet_hash_remove(ifa1);
420 
421 	/* 3. Announce address deletion */
422 
423 	/* Send message first, then call notifier.
424 	   At first sight, FIB update triggered by notifier
425 	   will refer to already deleted ifaddr, that could confuse
426 	   netlink listeners. It is not true: look, gated sees
427 	   that route deleted and if it still thinks that ifaddr
428 	   is valid, it will try to restore deleted routes... Grr.
429 	   So that, this order is correct.
430 	 */
431 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
432 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
433 
434 	if (promote) {
435 		struct in_ifaddr *next_sec;
436 
437 		next_sec = rtnl_dereference(promote->ifa_next);
438 		if (prev_prom) {
439 			struct in_ifaddr *last_sec;
440 
441 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
442 
443 			last_sec = rtnl_dereference(last_prim->ifa_next);
444 			rcu_assign_pointer(promote->ifa_next, last_sec);
445 			rcu_assign_pointer(last_prim->ifa_next, promote);
446 		}
447 
448 		promote->ifa_flags &= ~IFA_F_SECONDARY;
449 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
450 		blocking_notifier_call_chain(&inetaddr_chain,
451 				NETDEV_UP, promote);
452 		for (ifa = next_sec; ifa;
453 		     ifa = rtnl_dereference(ifa->ifa_next)) {
454 			if (ifa1->ifa_mask != ifa->ifa_mask ||
455 			    !inet_ifa_match(ifa1->ifa_address, ifa))
456 					continue;
457 			fib_add_ifaddr(ifa);
458 		}
459 
460 	}
461 	if (destroy)
462 		inet_free_ifa(ifa1);
463 }
464 
465 static void inet_del_ifa(struct in_device *in_dev,
466 			 struct in_ifaddr __rcu **ifap,
467 			 int destroy)
468 {
469 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
470 }
471 
472 static void check_lifetime(struct work_struct *work);
473 
474 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
475 
476 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
477 			     u32 portid, struct netlink_ext_ack *extack)
478 {
479 	struct in_ifaddr __rcu **last_primary, **ifap;
480 	struct in_device *in_dev = ifa->ifa_dev;
481 	struct in_validator_info ivi;
482 	struct in_ifaddr *ifa1;
483 	int ret;
484 
485 	ASSERT_RTNL();
486 
487 	if (!ifa->ifa_local) {
488 		inet_free_ifa(ifa);
489 		return 0;
490 	}
491 
492 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
493 	last_primary = &in_dev->ifa_list;
494 
495 	/* Don't set IPv6 only flags to IPv4 addresses */
496 	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
497 
498 	ifap = &in_dev->ifa_list;
499 	ifa1 = rtnl_dereference(*ifap);
500 
501 	while (ifa1) {
502 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
503 		    ifa->ifa_scope <= ifa1->ifa_scope)
504 			last_primary = &ifa1->ifa_next;
505 		if (ifa1->ifa_mask == ifa->ifa_mask &&
506 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
507 			if (ifa1->ifa_local == ifa->ifa_local) {
508 				inet_free_ifa(ifa);
509 				return -EEXIST;
510 			}
511 			if (ifa1->ifa_scope != ifa->ifa_scope) {
512 				inet_free_ifa(ifa);
513 				return -EINVAL;
514 			}
515 			ifa->ifa_flags |= IFA_F_SECONDARY;
516 		}
517 
518 		ifap = &ifa1->ifa_next;
519 		ifa1 = rtnl_dereference(*ifap);
520 	}
521 
522 	/* Allow any devices that wish to register ifaddr validtors to weigh
523 	 * in now, before changes are committed.  The rntl lock is serializing
524 	 * access here, so the state should not change between a validator call
525 	 * and a final notify on commit.  This isn't invoked on promotion under
526 	 * the assumption that validators are checking the address itself, and
527 	 * not the flags.
528 	 */
529 	ivi.ivi_addr = ifa->ifa_address;
530 	ivi.ivi_dev = ifa->ifa_dev;
531 	ivi.extack = extack;
532 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
533 					   NETDEV_UP, &ivi);
534 	ret = notifier_to_errno(ret);
535 	if (ret) {
536 		inet_free_ifa(ifa);
537 		return ret;
538 	}
539 
540 	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
541 		ifap = last_primary;
542 
543 	rcu_assign_pointer(ifa->ifa_next, *ifap);
544 	rcu_assign_pointer(*ifap, ifa);
545 
546 	inet_hash_insert(dev_net(in_dev->dev), ifa);
547 
548 	cancel_delayed_work(&check_lifetime_work);
549 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
550 
551 	/* Send message first, then call notifier.
552 	   Notifier will trigger FIB update, so that
553 	   listeners of netlink will know about new ifaddr */
554 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
555 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
556 
557 	return 0;
558 }
559 
560 static int inet_insert_ifa(struct in_ifaddr *ifa)
561 {
562 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
563 }
564 
565 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
566 {
567 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
568 
569 	ASSERT_RTNL();
570 
571 	if (!in_dev) {
572 		inet_free_ifa(ifa);
573 		return -ENOBUFS;
574 	}
575 	ipv4_devconf_setall(in_dev);
576 	neigh_parms_data_state_setall(in_dev->arp_parms);
577 	if (ifa->ifa_dev != in_dev) {
578 		WARN_ON(ifa->ifa_dev);
579 		in_dev_hold(in_dev);
580 		ifa->ifa_dev = in_dev;
581 	}
582 	if (ipv4_is_loopback(ifa->ifa_local))
583 		ifa->ifa_scope = RT_SCOPE_HOST;
584 	return inet_insert_ifa(ifa);
585 }
586 
587 /* Caller must hold RCU or RTNL :
588  * We dont take a reference on found in_device
589  */
590 struct in_device *inetdev_by_index(struct net *net, int ifindex)
591 {
592 	struct net_device *dev;
593 	struct in_device *in_dev = NULL;
594 
595 	rcu_read_lock();
596 	dev = dev_get_by_index_rcu(net, ifindex);
597 	if (dev)
598 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
599 	rcu_read_unlock();
600 	return in_dev;
601 }
602 EXPORT_SYMBOL(inetdev_by_index);
603 
604 /* Called only from RTNL semaphored context. No locks. */
605 
606 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
607 				    __be32 mask)
608 {
609 	struct in_ifaddr *ifa;
610 
611 	ASSERT_RTNL();
612 
613 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
614 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
615 			return ifa;
616 	}
617 	return NULL;
618 }
619 
620 static int ip_mc_autojoin_config(struct net *net, bool join,
621 				 const struct in_ifaddr *ifa)
622 {
623 #if defined(CONFIG_IP_MULTICAST)
624 	struct ip_mreqn mreq = {
625 		.imr_multiaddr.s_addr = ifa->ifa_address,
626 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
627 	};
628 	struct sock *sk = net->ipv4.mc_autojoin_sk;
629 	int ret;
630 
631 	ASSERT_RTNL();
632 
633 	lock_sock(sk);
634 	if (join)
635 		ret = ip_mc_join_group(sk, &mreq);
636 	else
637 		ret = ip_mc_leave_group(sk, &mreq);
638 	release_sock(sk);
639 
640 	return ret;
641 #else
642 	return -EOPNOTSUPP;
643 #endif
644 }
645 
646 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
647 			    struct netlink_ext_ack *extack)
648 {
649 	struct net *net = sock_net(skb->sk);
650 	struct in_ifaddr __rcu **ifap;
651 	struct nlattr *tb[IFA_MAX+1];
652 	struct in_device *in_dev;
653 	struct ifaddrmsg *ifm;
654 	struct in_ifaddr *ifa;
655 	int err;
656 
657 	ASSERT_RTNL();
658 
659 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
660 				     ifa_ipv4_policy, extack);
661 	if (err < 0)
662 		goto errout;
663 
664 	ifm = nlmsg_data(nlh);
665 	in_dev = inetdev_by_index(net, ifm->ifa_index);
666 	if (!in_dev) {
667 		err = -ENODEV;
668 		goto errout;
669 	}
670 
671 	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
672 	     ifap = &ifa->ifa_next) {
673 		if (tb[IFA_LOCAL] &&
674 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
675 			continue;
676 
677 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
678 			continue;
679 
680 		if (tb[IFA_ADDRESS] &&
681 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
682 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
683 			continue;
684 
685 		if (ipv4_is_multicast(ifa->ifa_address))
686 			ip_mc_autojoin_config(net, false, ifa);
687 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
688 		return 0;
689 	}
690 
691 	err = -EADDRNOTAVAIL;
692 errout:
693 	return err;
694 }
695 
696 #define INFINITY_LIFE_TIME	0xFFFFFFFF
697 
698 static void check_lifetime(struct work_struct *work)
699 {
700 	unsigned long now, next, next_sec, next_sched;
701 	struct in_ifaddr *ifa;
702 	struct hlist_node *n;
703 	int i;
704 
705 	now = jiffies;
706 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
707 
708 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
709 		bool change_needed = false;
710 
711 		rcu_read_lock();
712 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
713 			unsigned long age;
714 
715 			if (ifa->ifa_flags & IFA_F_PERMANENT)
716 				continue;
717 
718 			/* We try to batch several events at once. */
719 			age = (now - ifa->ifa_tstamp +
720 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
721 
722 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
723 			    age >= ifa->ifa_valid_lft) {
724 				change_needed = true;
725 			} else if (ifa->ifa_preferred_lft ==
726 				   INFINITY_LIFE_TIME) {
727 				continue;
728 			} else if (age >= ifa->ifa_preferred_lft) {
729 				if (time_before(ifa->ifa_tstamp +
730 						ifa->ifa_valid_lft * HZ, next))
731 					next = ifa->ifa_tstamp +
732 					       ifa->ifa_valid_lft * HZ;
733 
734 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
735 					change_needed = true;
736 			} else if (time_before(ifa->ifa_tstamp +
737 					       ifa->ifa_preferred_lft * HZ,
738 					       next)) {
739 				next = ifa->ifa_tstamp +
740 				       ifa->ifa_preferred_lft * HZ;
741 			}
742 		}
743 		rcu_read_unlock();
744 		if (!change_needed)
745 			continue;
746 		rtnl_lock();
747 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
748 			unsigned long age;
749 
750 			if (ifa->ifa_flags & IFA_F_PERMANENT)
751 				continue;
752 
753 			/* We try to batch several events at once. */
754 			age = (now - ifa->ifa_tstamp +
755 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
756 
757 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
758 			    age >= ifa->ifa_valid_lft) {
759 				struct in_ifaddr __rcu **ifap;
760 				struct in_ifaddr *tmp;
761 
762 				ifap = &ifa->ifa_dev->ifa_list;
763 				tmp = rtnl_dereference(*ifap);
764 				while (tmp) {
765 					if (tmp == ifa) {
766 						inet_del_ifa(ifa->ifa_dev,
767 							     ifap, 1);
768 						break;
769 					}
770 					ifap = &tmp->ifa_next;
771 					tmp = rtnl_dereference(*ifap);
772 				}
773 			} else if (ifa->ifa_preferred_lft !=
774 				   INFINITY_LIFE_TIME &&
775 				   age >= ifa->ifa_preferred_lft &&
776 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
777 				ifa->ifa_flags |= IFA_F_DEPRECATED;
778 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
779 			}
780 		}
781 		rtnl_unlock();
782 	}
783 
784 	next_sec = round_jiffies_up(next);
785 	next_sched = next;
786 
787 	/* If rounded timeout is accurate enough, accept it. */
788 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
789 		next_sched = next_sec;
790 
791 	now = jiffies;
792 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
793 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
794 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
795 
796 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
797 			next_sched - now);
798 }
799 
800 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
801 			     __u32 prefered_lft)
802 {
803 	unsigned long timeout;
804 
805 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
806 
807 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
808 	if (addrconf_finite_timeout(timeout))
809 		ifa->ifa_valid_lft = timeout;
810 	else
811 		ifa->ifa_flags |= IFA_F_PERMANENT;
812 
813 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
814 	if (addrconf_finite_timeout(timeout)) {
815 		if (timeout == 0)
816 			ifa->ifa_flags |= IFA_F_DEPRECATED;
817 		ifa->ifa_preferred_lft = timeout;
818 	}
819 	ifa->ifa_tstamp = jiffies;
820 	if (!ifa->ifa_cstamp)
821 		ifa->ifa_cstamp = ifa->ifa_tstamp;
822 }
823 
824 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
825 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
826 				       struct netlink_ext_ack *extack)
827 {
828 	struct nlattr *tb[IFA_MAX+1];
829 	struct in_ifaddr *ifa;
830 	struct ifaddrmsg *ifm;
831 	struct net_device *dev;
832 	struct in_device *in_dev;
833 	int err;
834 
835 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
836 				     ifa_ipv4_policy, extack);
837 	if (err < 0)
838 		goto errout;
839 
840 	ifm = nlmsg_data(nlh);
841 	err = -EINVAL;
842 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
843 		goto errout;
844 
845 	dev = __dev_get_by_index(net, ifm->ifa_index);
846 	err = -ENODEV;
847 	if (!dev)
848 		goto errout;
849 
850 	in_dev = __in_dev_get_rtnl(dev);
851 	err = -ENOBUFS;
852 	if (!in_dev)
853 		goto errout;
854 
855 	ifa = inet_alloc_ifa();
856 	if (!ifa)
857 		/*
858 		 * A potential indev allocation can be left alive, it stays
859 		 * assigned to its device and is destroy with it.
860 		 */
861 		goto errout;
862 
863 	ipv4_devconf_setall(in_dev);
864 	neigh_parms_data_state_setall(in_dev->arp_parms);
865 	in_dev_hold(in_dev);
866 
867 	if (!tb[IFA_ADDRESS])
868 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
869 
870 	INIT_HLIST_NODE(&ifa->hash);
871 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
872 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
873 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
874 					 ifm->ifa_flags;
875 	ifa->ifa_scope = ifm->ifa_scope;
876 	ifa->ifa_dev = in_dev;
877 
878 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
879 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
880 
881 	if (tb[IFA_BROADCAST])
882 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
883 
884 	if (tb[IFA_LABEL])
885 		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
886 	else
887 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
888 
889 	if (tb[IFA_RT_PRIORITY])
890 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
891 
892 	if (tb[IFA_PROTO])
893 		ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
894 
895 	if (tb[IFA_CACHEINFO]) {
896 		struct ifa_cacheinfo *ci;
897 
898 		ci = nla_data(tb[IFA_CACHEINFO]);
899 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
900 			err = -EINVAL;
901 			goto errout_free;
902 		}
903 		*pvalid_lft = ci->ifa_valid;
904 		*pprefered_lft = ci->ifa_prefered;
905 	}
906 
907 	return ifa;
908 
909 errout_free:
910 	inet_free_ifa(ifa);
911 errout:
912 	return ERR_PTR(err);
913 }
914 
915 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
916 {
917 	struct in_device *in_dev = ifa->ifa_dev;
918 	struct in_ifaddr *ifa1;
919 
920 	if (!ifa->ifa_local)
921 		return NULL;
922 
923 	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
924 		if (ifa1->ifa_mask == ifa->ifa_mask &&
925 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
926 		    ifa1->ifa_local == ifa->ifa_local)
927 			return ifa1;
928 	}
929 	return NULL;
930 }
931 
932 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
933 			    struct netlink_ext_ack *extack)
934 {
935 	struct net *net = sock_net(skb->sk);
936 	struct in_ifaddr *ifa;
937 	struct in_ifaddr *ifa_existing;
938 	__u32 valid_lft = INFINITY_LIFE_TIME;
939 	__u32 prefered_lft = INFINITY_LIFE_TIME;
940 
941 	ASSERT_RTNL();
942 
943 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
944 	if (IS_ERR(ifa))
945 		return PTR_ERR(ifa);
946 
947 	ifa_existing = find_matching_ifa(ifa);
948 	if (!ifa_existing) {
949 		/* It would be best to check for !NLM_F_CREATE here but
950 		 * userspace already relies on not having to provide this.
951 		 */
952 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
953 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
954 			int ret = ip_mc_autojoin_config(net, true, ifa);
955 
956 			if (ret < 0) {
957 				inet_free_ifa(ifa);
958 				return ret;
959 			}
960 		}
961 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
962 					 extack);
963 	} else {
964 		u32 new_metric = ifa->ifa_rt_priority;
965 
966 		inet_free_ifa(ifa);
967 
968 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
969 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
970 			return -EEXIST;
971 		ifa = ifa_existing;
972 
973 		if (ifa->ifa_rt_priority != new_metric) {
974 			fib_modify_prefix_metric(ifa, new_metric);
975 			ifa->ifa_rt_priority = new_metric;
976 		}
977 
978 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
979 		cancel_delayed_work(&check_lifetime_work);
980 		queue_delayed_work(system_power_efficient_wq,
981 				&check_lifetime_work, 0);
982 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
983 	}
984 	return 0;
985 }
986 
987 /*
988  *	Determine a default network mask, based on the IP address.
989  */
990 
991 static int inet_abc_len(__be32 addr)
992 {
993 	int rc = -1;	/* Something else, probably a multicast. */
994 
995 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
996 		rc = 0;
997 	else {
998 		__u32 haddr = ntohl(addr);
999 		if (IN_CLASSA(haddr))
1000 			rc = 8;
1001 		else if (IN_CLASSB(haddr))
1002 			rc = 16;
1003 		else if (IN_CLASSC(haddr))
1004 			rc = 24;
1005 		else if (IN_CLASSE(haddr))
1006 			rc = 32;
1007 	}
1008 
1009 	return rc;
1010 }
1011 
1012 
1013 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1014 {
1015 	struct sockaddr_in sin_orig;
1016 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1017 	struct in_ifaddr __rcu **ifap = NULL;
1018 	struct in_device *in_dev;
1019 	struct in_ifaddr *ifa = NULL;
1020 	struct net_device *dev;
1021 	char *colon;
1022 	int ret = -EFAULT;
1023 	int tryaddrmatch = 0;
1024 
1025 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1026 
1027 	/* save original address for comparison */
1028 	memcpy(&sin_orig, sin, sizeof(*sin));
1029 
1030 	colon = strchr(ifr->ifr_name, ':');
1031 	if (colon)
1032 		*colon = 0;
1033 
1034 	dev_load(net, ifr->ifr_name);
1035 
1036 	switch (cmd) {
1037 	case SIOCGIFADDR:	/* Get interface address */
1038 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1039 	case SIOCGIFDSTADDR:	/* Get the destination address */
1040 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1041 		/* Note that these ioctls will not sleep,
1042 		   so that we do not impose a lock.
1043 		   One day we will be forced to put shlock here (I mean SMP)
1044 		 */
1045 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1046 		memset(sin, 0, sizeof(*sin));
1047 		sin->sin_family = AF_INET;
1048 		break;
1049 
1050 	case SIOCSIFFLAGS:
1051 		ret = -EPERM;
1052 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1053 			goto out;
1054 		break;
1055 	case SIOCSIFADDR:	/* Set interface address (and family) */
1056 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1057 	case SIOCSIFDSTADDR:	/* Set the destination address */
1058 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1059 		ret = -EPERM;
1060 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1061 			goto out;
1062 		ret = -EINVAL;
1063 		if (sin->sin_family != AF_INET)
1064 			goto out;
1065 		break;
1066 	default:
1067 		ret = -EINVAL;
1068 		goto out;
1069 	}
1070 
1071 	rtnl_lock();
1072 
1073 	ret = -ENODEV;
1074 	dev = __dev_get_by_name(net, ifr->ifr_name);
1075 	if (!dev)
1076 		goto done;
1077 
1078 	if (colon)
1079 		*colon = ':';
1080 
1081 	in_dev = __in_dev_get_rtnl(dev);
1082 	if (in_dev) {
1083 		if (tryaddrmatch) {
1084 			/* Matthias Andree */
1085 			/* compare label and address (4.4BSD style) */
1086 			/* note: we only do this for a limited set of ioctls
1087 			   and only if the original address family was AF_INET.
1088 			   This is checked above. */
1089 
1090 			for (ifap = &in_dev->ifa_list;
1091 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1092 			     ifap = &ifa->ifa_next) {
1093 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1094 				    sin_orig.sin_addr.s_addr ==
1095 							ifa->ifa_local) {
1096 					break; /* found */
1097 				}
1098 			}
1099 		}
1100 		/* we didn't get a match, maybe the application is
1101 		   4.3BSD-style and passed in junk so we fall back to
1102 		   comparing just the label */
1103 		if (!ifa) {
1104 			for (ifap = &in_dev->ifa_list;
1105 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1106 			     ifap = &ifa->ifa_next)
1107 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1108 					break;
1109 		}
1110 	}
1111 
1112 	ret = -EADDRNOTAVAIL;
1113 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1114 		goto done;
1115 
1116 	switch (cmd) {
1117 	case SIOCGIFADDR:	/* Get interface address */
1118 		ret = 0;
1119 		sin->sin_addr.s_addr = ifa->ifa_local;
1120 		break;
1121 
1122 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1123 		ret = 0;
1124 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1125 		break;
1126 
1127 	case SIOCGIFDSTADDR:	/* Get the destination address */
1128 		ret = 0;
1129 		sin->sin_addr.s_addr = ifa->ifa_address;
1130 		break;
1131 
1132 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1133 		ret = 0;
1134 		sin->sin_addr.s_addr = ifa->ifa_mask;
1135 		break;
1136 
1137 	case SIOCSIFFLAGS:
1138 		if (colon) {
1139 			ret = -EADDRNOTAVAIL;
1140 			if (!ifa)
1141 				break;
1142 			ret = 0;
1143 			if (!(ifr->ifr_flags & IFF_UP))
1144 				inet_del_ifa(in_dev, ifap, 1);
1145 			break;
1146 		}
1147 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1148 		break;
1149 
1150 	case SIOCSIFADDR:	/* Set interface address (and family) */
1151 		ret = -EINVAL;
1152 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1153 			break;
1154 
1155 		if (!ifa) {
1156 			ret = -ENOBUFS;
1157 			ifa = inet_alloc_ifa();
1158 			if (!ifa)
1159 				break;
1160 			INIT_HLIST_NODE(&ifa->hash);
1161 			if (colon)
1162 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1163 			else
1164 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1165 		} else {
1166 			ret = 0;
1167 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1168 				break;
1169 			inet_del_ifa(in_dev, ifap, 0);
1170 			ifa->ifa_broadcast = 0;
1171 			ifa->ifa_scope = 0;
1172 		}
1173 
1174 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1175 
1176 		if (!(dev->flags & IFF_POINTOPOINT)) {
1177 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1178 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1179 			if ((dev->flags & IFF_BROADCAST) &&
1180 			    ifa->ifa_prefixlen < 31)
1181 				ifa->ifa_broadcast = ifa->ifa_address |
1182 						     ~ifa->ifa_mask;
1183 		} else {
1184 			ifa->ifa_prefixlen = 32;
1185 			ifa->ifa_mask = inet_make_mask(32);
1186 		}
1187 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1188 		ret = inet_set_ifa(dev, ifa);
1189 		break;
1190 
1191 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1192 		ret = 0;
1193 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1194 			inet_del_ifa(in_dev, ifap, 0);
1195 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1196 			inet_insert_ifa(ifa);
1197 		}
1198 		break;
1199 
1200 	case SIOCSIFDSTADDR:	/* Set the destination address */
1201 		ret = 0;
1202 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1203 			break;
1204 		ret = -EINVAL;
1205 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1206 			break;
1207 		ret = 0;
1208 		inet_del_ifa(in_dev, ifap, 0);
1209 		ifa->ifa_address = sin->sin_addr.s_addr;
1210 		inet_insert_ifa(ifa);
1211 		break;
1212 
1213 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1214 
1215 		/*
1216 		 *	The mask we set must be legal.
1217 		 */
1218 		ret = -EINVAL;
1219 		if (bad_mask(sin->sin_addr.s_addr, 0))
1220 			break;
1221 		ret = 0;
1222 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1223 			__be32 old_mask = ifa->ifa_mask;
1224 			inet_del_ifa(in_dev, ifap, 0);
1225 			ifa->ifa_mask = sin->sin_addr.s_addr;
1226 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1227 
1228 			/* See if current broadcast address matches
1229 			 * with current netmask, then recalculate
1230 			 * the broadcast address. Otherwise it's a
1231 			 * funny address, so don't touch it since
1232 			 * the user seems to know what (s)he's doing...
1233 			 */
1234 			if ((dev->flags & IFF_BROADCAST) &&
1235 			    (ifa->ifa_prefixlen < 31) &&
1236 			    (ifa->ifa_broadcast ==
1237 			     (ifa->ifa_local|~old_mask))) {
1238 				ifa->ifa_broadcast = (ifa->ifa_local |
1239 						      ~sin->sin_addr.s_addr);
1240 			}
1241 			inet_insert_ifa(ifa);
1242 		}
1243 		break;
1244 	}
1245 done:
1246 	rtnl_unlock();
1247 out:
1248 	return ret;
1249 }
1250 
1251 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1252 {
1253 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1254 	const struct in_ifaddr *ifa;
1255 	struct ifreq ifr;
1256 	int done = 0;
1257 
1258 	if (WARN_ON(size > sizeof(struct ifreq)))
1259 		goto out;
1260 
1261 	if (!in_dev)
1262 		goto out;
1263 
1264 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1265 		if (!buf) {
1266 			done += size;
1267 			continue;
1268 		}
1269 		if (len < size)
1270 			break;
1271 		memset(&ifr, 0, sizeof(struct ifreq));
1272 		strcpy(ifr.ifr_name, ifa->ifa_label);
1273 
1274 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1275 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1276 								ifa->ifa_local;
1277 
1278 		if (copy_to_user(buf + done, &ifr, size)) {
1279 			done = -EFAULT;
1280 			break;
1281 		}
1282 		len  -= size;
1283 		done += size;
1284 	}
1285 out:
1286 	return done;
1287 }
1288 
1289 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1290 				 int scope)
1291 {
1292 	const struct in_ifaddr *ifa;
1293 
1294 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1295 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1296 			continue;
1297 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1298 		    ifa->ifa_scope <= scope)
1299 			return ifa->ifa_local;
1300 	}
1301 
1302 	return 0;
1303 }
1304 
1305 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1306 {
1307 	const struct in_ifaddr *ifa;
1308 	__be32 addr = 0;
1309 	unsigned char localnet_scope = RT_SCOPE_HOST;
1310 	struct in_device *in_dev;
1311 	struct net *net = dev_net(dev);
1312 	int master_idx;
1313 
1314 	rcu_read_lock();
1315 	in_dev = __in_dev_get_rcu(dev);
1316 	if (!in_dev)
1317 		goto no_in_dev;
1318 
1319 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1320 		localnet_scope = RT_SCOPE_LINK;
1321 
1322 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1323 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1324 			continue;
1325 		if (min(ifa->ifa_scope, localnet_scope) > scope)
1326 			continue;
1327 		if (!dst || inet_ifa_match(dst, ifa)) {
1328 			addr = ifa->ifa_local;
1329 			break;
1330 		}
1331 		if (!addr)
1332 			addr = ifa->ifa_local;
1333 	}
1334 
1335 	if (addr)
1336 		goto out_unlock;
1337 no_in_dev:
1338 	master_idx = l3mdev_master_ifindex_rcu(dev);
1339 
1340 	/* For VRFs, the VRF device takes the place of the loopback device,
1341 	 * with addresses on it being preferred.  Note in such cases the
1342 	 * loopback device will be among the devices that fail the master_idx
1343 	 * equality check in the loop below.
1344 	 */
1345 	if (master_idx &&
1346 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1347 	    (in_dev = __in_dev_get_rcu(dev))) {
1348 		addr = in_dev_select_addr(in_dev, scope);
1349 		if (addr)
1350 			goto out_unlock;
1351 	}
1352 
1353 	/* Not loopback addresses on loopback should be preferred
1354 	   in this case. It is important that lo is the first interface
1355 	   in dev_base list.
1356 	 */
1357 	for_each_netdev_rcu(net, dev) {
1358 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1359 			continue;
1360 
1361 		in_dev = __in_dev_get_rcu(dev);
1362 		if (!in_dev)
1363 			continue;
1364 
1365 		addr = in_dev_select_addr(in_dev, scope);
1366 		if (addr)
1367 			goto out_unlock;
1368 	}
1369 out_unlock:
1370 	rcu_read_unlock();
1371 	return addr;
1372 }
1373 EXPORT_SYMBOL(inet_select_addr);
1374 
1375 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1376 			      __be32 local, int scope)
1377 {
1378 	unsigned char localnet_scope = RT_SCOPE_HOST;
1379 	const struct in_ifaddr *ifa;
1380 	__be32 addr = 0;
1381 	int same = 0;
1382 
1383 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1384 		localnet_scope = RT_SCOPE_LINK;
1385 
1386 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1387 		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1388 
1389 		if (!addr &&
1390 		    (local == ifa->ifa_local || !local) &&
1391 		    min_scope <= scope) {
1392 			addr = ifa->ifa_local;
1393 			if (same)
1394 				break;
1395 		}
1396 		if (!same) {
1397 			same = (!local || inet_ifa_match(local, ifa)) &&
1398 				(!dst || inet_ifa_match(dst, ifa));
1399 			if (same && addr) {
1400 				if (local || !dst)
1401 					break;
1402 				/* Is the selected addr into dst subnet? */
1403 				if (inet_ifa_match(addr, ifa))
1404 					break;
1405 				/* No, then can we use new local src? */
1406 				if (min_scope <= scope) {
1407 					addr = ifa->ifa_local;
1408 					break;
1409 				}
1410 				/* search for large dst subnet for addr */
1411 				same = 0;
1412 			}
1413 		}
1414 	}
1415 
1416 	return same ? addr : 0;
1417 }
1418 
1419 /*
1420  * Confirm that local IP address exists using wildcards:
1421  * - net: netns to check, cannot be NULL
1422  * - in_dev: only on this interface, NULL=any interface
1423  * - dst: only in the same subnet as dst, 0=any dst
1424  * - local: address, 0=autoselect the local address
1425  * - scope: maximum allowed scope value for the local address
1426  */
1427 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1428 			 __be32 dst, __be32 local, int scope)
1429 {
1430 	__be32 addr = 0;
1431 	struct net_device *dev;
1432 
1433 	if (in_dev)
1434 		return confirm_addr_indev(in_dev, dst, local, scope);
1435 
1436 	rcu_read_lock();
1437 	for_each_netdev_rcu(net, dev) {
1438 		in_dev = __in_dev_get_rcu(dev);
1439 		if (in_dev) {
1440 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1441 			if (addr)
1442 				break;
1443 		}
1444 	}
1445 	rcu_read_unlock();
1446 
1447 	return addr;
1448 }
1449 EXPORT_SYMBOL(inet_confirm_addr);
1450 
1451 /*
1452  *	Device notifier
1453  */
1454 
1455 int register_inetaddr_notifier(struct notifier_block *nb)
1456 {
1457 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1458 }
1459 EXPORT_SYMBOL(register_inetaddr_notifier);
1460 
1461 int unregister_inetaddr_notifier(struct notifier_block *nb)
1462 {
1463 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1464 }
1465 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1466 
1467 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1468 {
1469 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1470 }
1471 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1472 
1473 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1474 {
1475 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1476 	    nb);
1477 }
1478 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1479 
1480 /* Rename ifa_labels for a device name change. Make some effort to preserve
1481  * existing alias numbering and to create unique labels if possible.
1482 */
1483 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1484 {
1485 	struct in_ifaddr *ifa;
1486 	int named = 0;
1487 
1488 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1489 		char old[IFNAMSIZ], *dot;
1490 
1491 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1492 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1493 		if (named++ == 0)
1494 			goto skip;
1495 		dot = strchr(old, ':');
1496 		if (!dot) {
1497 			sprintf(old, ":%d", named);
1498 			dot = old;
1499 		}
1500 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1501 			strcat(ifa->ifa_label, dot);
1502 		else
1503 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1504 skip:
1505 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1506 	}
1507 }
1508 
1509 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1510 					struct in_device *in_dev)
1511 
1512 {
1513 	const struct in_ifaddr *ifa;
1514 
1515 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1516 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1517 			 ifa->ifa_local, dev,
1518 			 ifa->ifa_local, NULL,
1519 			 dev->dev_addr, NULL);
1520 	}
1521 }
1522 
1523 /* Called only under RTNL semaphore */
1524 
1525 static int inetdev_event(struct notifier_block *this, unsigned long event,
1526 			 void *ptr)
1527 {
1528 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1529 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1530 
1531 	ASSERT_RTNL();
1532 
1533 	if (!in_dev) {
1534 		if (event == NETDEV_REGISTER) {
1535 			in_dev = inetdev_init(dev);
1536 			if (IS_ERR(in_dev))
1537 				return notifier_from_errno(PTR_ERR(in_dev));
1538 			if (dev->flags & IFF_LOOPBACK) {
1539 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1540 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1541 			}
1542 		} else if (event == NETDEV_CHANGEMTU) {
1543 			/* Re-enabling IP */
1544 			if (inetdev_valid_mtu(dev->mtu))
1545 				in_dev = inetdev_init(dev);
1546 		}
1547 		goto out;
1548 	}
1549 
1550 	switch (event) {
1551 	case NETDEV_REGISTER:
1552 		pr_debug("%s: bug\n", __func__);
1553 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1554 		break;
1555 	case NETDEV_UP:
1556 		if (!inetdev_valid_mtu(dev->mtu))
1557 			break;
1558 		if (dev->flags & IFF_LOOPBACK) {
1559 			struct in_ifaddr *ifa = inet_alloc_ifa();
1560 
1561 			if (ifa) {
1562 				INIT_HLIST_NODE(&ifa->hash);
1563 				ifa->ifa_local =
1564 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1565 				ifa->ifa_prefixlen = 8;
1566 				ifa->ifa_mask = inet_make_mask(8);
1567 				in_dev_hold(in_dev);
1568 				ifa->ifa_dev = in_dev;
1569 				ifa->ifa_scope = RT_SCOPE_HOST;
1570 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1571 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1572 						 INFINITY_LIFE_TIME);
1573 				ipv4_devconf_setall(in_dev);
1574 				neigh_parms_data_state_setall(in_dev->arp_parms);
1575 				inet_insert_ifa(ifa);
1576 			}
1577 		}
1578 		ip_mc_up(in_dev);
1579 		fallthrough;
1580 	case NETDEV_CHANGEADDR:
1581 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1582 			break;
1583 		fallthrough;
1584 	case NETDEV_NOTIFY_PEERS:
1585 		/* Send gratuitous ARP to notify of link change */
1586 		inetdev_send_gratuitous_arp(dev, in_dev);
1587 		break;
1588 	case NETDEV_DOWN:
1589 		ip_mc_down(in_dev);
1590 		break;
1591 	case NETDEV_PRE_TYPE_CHANGE:
1592 		ip_mc_unmap(in_dev);
1593 		break;
1594 	case NETDEV_POST_TYPE_CHANGE:
1595 		ip_mc_remap(in_dev);
1596 		break;
1597 	case NETDEV_CHANGEMTU:
1598 		if (inetdev_valid_mtu(dev->mtu))
1599 			break;
1600 		/* disable IP when MTU is not enough */
1601 		fallthrough;
1602 	case NETDEV_UNREGISTER:
1603 		inetdev_destroy(in_dev);
1604 		break;
1605 	case NETDEV_CHANGENAME:
1606 		/* Do not notify about label change, this event is
1607 		 * not interesting to applications using netlink.
1608 		 */
1609 		inetdev_changename(dev, in_dev);
1610 
1611 		devinet_sysctl_unregister(in_dev);
1612 		devinet_sysctl_register(in_dev);
1613 		break;
1614 	}
1615 out:
1616 	return NOTIFY_DONE;
1617 }
1618 
1619 static struct notifier_block ip_netdev_notifier = {
1620 	.notifier_call = inetdev_event,
1621 };
1622 
1623 static size_t inet_nlmsg_size(void)
1624 {
1625 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1626 	       + nla_total_size(4) /* IFA_ADDRESS */
1627 	       + nla_total_size(4) /* IFA_LOCAL */
1628 	       + nla_total_size(4) /* IFA_BROADCAST */
1629 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1630 	       + nla_total_size(4)  /* IFA_FLAGS */
1631 	       + nla_total_size(1)  /* IFA_PROTO */
1632 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1633 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1634 }
1635 
1636 static inline u32 cstamp_delta(unsigned long cstamp)
1637 {
1638 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1639 }
1640 
1641 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1642 			 unsigned long tstamp, u32 preferred, u32 valid)
1643 {
1644 	struct ifa_cacheinfo ci;
1645 
1646 	ci.cstamp = cstamp_delta(cstamp);
1647 	ci.tstamp = cstamp_delta(tstamp);
1648 	ci.ifa_prefered = preferred;
1649 	ci.ifa_valid = valid;
1650 
1651 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1652 }
1653 
1654 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1655 			    struct inet_fill_args *args)
1656 {
1657 	struct ifaddrmsg *ifm;
1658 	struct nlmsghdr  *nlh;
1659 	u32 preferred, valid;
1660 
1661 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1662 			args->flags);
1663 	if (!nlh)
1664 		return -EMSGSIZE;
1665 
1666 	ifm = nlmsg_data(nlh);
1667 	ifm->ifa_family = AF_INET;
1668 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1669 	ifm->ifa_flags = ifa->ifa_flags;
1670 	ifm->ifa_scope = ifa->ifa_scope;
1671 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1672 
1673 	if (args->netnsid >= 0 &&
1674 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1675 		goto nla_put_failure;
1676 
1677 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1678 		preferred = ifa->ifa_preferred_lft;
1679 		valid = ifa->ifa_valid_lft;
1680 		if (preferred != INFINITY_LIFE_TIME) {
1681 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1682 
1683 			if (preferred > tval)
1684 				preferred -= tval;
1685 			else
1686 				preferred = 0;
1687 			if (valid != INFINITY_LIFE_TIME) {
1688 				if (valid > tval)
1689 					valid -= tval;
1690 				else
1691 					valid = 0;
1692 			}
1693 		}
1694 	} else {
1695 		preferred = INFINITY_LIFE_TIME;
1696 		valid = INFINITY_LIFE_TIME;
1697 	}
1698 	if ((ifa->ifa_address &&
1699 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1700 	    (ifa->ifa_local &&
1701 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1702 	    (ifa->ifa_broadcast &&
1703 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1704 	    (ifa->ifa_label[0] &&
1705 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1706 	    (ifa->ifa_proto &&
1707 	     nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1708 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1709 	    (ifa->ifa_rt_priority &&
1710 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1711 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1712 			  preferred, valid))
1713 		goto nla_put_failure;
1714 
1715 	nlmsg_end(skb, nlh);
1716 	return 0;
1717 
1718 nla_put_failure:
1719 	nlmsg_cancel(skb, nlh);
1720 	return -EMSGSIZE;
1721 }
1722 
1723 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1724 				      struct inet_fill_args *fillargs,
1725 				      struct net **tgt_net, struct sock *sk,
1726 				      struct netlink_callback *cb)
1727 {
1728 	struct netlink_ext_ack *extack = cb->extack;
1729 	struct nlattr *tb[IFA_MAX+1];
1730 	struct ifaddrmsg *ifm;
1731 	int err, i;
1732 
1733 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1734 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1735 		return -EINVAL;
1736 	}
1737 
1738 	ifm = nlmsg_data(nlh);
1739 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1740 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1741 		return -EINVAL;
1742 	}
1743 
1744 	fillargs->ifindex = ifm->ifa_index;
1745 	if (fillargs->ifindex) {
1746 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1747 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1748 	}
1749 
1750 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1751 					    ifa_ipv4_policy, extack);
1752 	if (err < 0)
1753 		return err;
1754 
1755 	for (i = 0; i <= IFA_MAX; ++i) {
1756 		if (!tb[i])
1757 			continue;
1758 
1759 		if (i == IFA_TARGET_NETNSID) {
1760 			struct net *net;
1761 
1762 			fillargs->netnsid = nla_get_s32(tb[i]);
1763 
1764 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1765 			if (IS_ERR(net)) {
1766 				fillargs->netnsid = -1;
1767 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1768 				return PTR_ERR(net);
1769 			}
1770 			*tgt_net = net;
1771 		} else {
1772 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1773 			return -EINVAL;
1774 		}
1775 	}
1776 
1777 	return 0;
1778 }
1779 
1780 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1781 			    struct netlink_callback *cb, int s_ip_idx,
1782 			    struct inet_fill_args *fillargs)
1783 {
1784 	struct in_ifaddr *ifa;
1785 	int ip_idx = 0;
1786 	int err;
1787 
1788 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1789 		if (ip_idx < s_ip_idx) {
1790 			ip_idx++;
1791 			continue;
1792 		}
1793 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1794 		if (err < 0)
1795 			goto done;
1796 
1797 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1798 		ip_idx++;
1799 	}
1800 	err = 0;
1801 
1802 done:
1803 	cb->args[2] = ip_idx;
1804 
1805 	return err;
1806 }
1807 
1808 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1809 {
1810 	const struct nlmsghdr *nlh = cb->nlh;
1811 	struct inet_fill_args fillargs = {
1812 		.portid = NETLINK_CB(cb->skb).portid,
1813 		.seq = nlh->nlmsg_seq,
1814 		.event = RTM_NEWADDR,
1815 		.flags = NLM_F_MULTI,
1816 		.netnsid = -1,
1817 	};
1818 	struct net *net = sock_net(skb->sk);
1819 	struct net *tgt_net = net;
1820 	int h, s_h;
1821 	int idx, s_idx;
1822 	int s_ip_idx;
1823 	struct net_device *dev;
1824 	struct in_device *in_dev;
1825 	struct hlist_head *head;
1826 	int err = 0;
1827 
1828 	s_h = cb->args[0];
1829 	s_idx = idx = cb->args[1];
1830 	s_ip_idx = cb->args[2];
1831 
1832 	if (cb->strict_check) {
1833 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1834 						 skb->sk, cb);
1835 		if (err < 0)
1836 			goto put_tgt_net;
1837 
1838 		err = 0;
1839 		if (fillargs.ifindex) {
1840 			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1841 			if (!dev) {
1842 				err = -ENODEV;
1843 				goto put_tgt_net;
1844 			}
1845 
1846 			in_dev = __in_dev_get_rtnl(dev);
1847 			if (in_dev) {
1848 				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1849 						       &fillargs);
1850 			}
1851 			goto put_tgt_net;
1852 		}
1853 	}
1854 
1855 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1856 		idx = 0;
1857 		head = &tgt_net->dev_index_head[h];
1858 		rcu_read_lock();
1859 		cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1860 			  tgt_net->dev_base_seq;
1861 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1862 			if (idx < s_idx)
1863 				goto cont;
1864 			if (h > s_h || idx > s_idx)
1865 				s_ip_idx = 0;
1866 			in_dev = __in_dev_get_rcu(dev);
1867 			if (!in_dev)
1868 				goto cont;
1869 
1870 			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1871 					       &fillargs);
1872 			if (err < 0) {
1873 				rcu_read_unlock();
1874 				goto done;
1875 			}
1876 cont:
1877 			idx++;
1878 		}
1879 		rcu_read_unlock();
1880 	}
1881 
1882 done:
1883 	cb->args[0] = h;
1884 	cb->args[1] = idx;
1885 put_tgt_net:
1886 	if (fillargs.netnsid >= 0)
1887 		put_net(tgt_net);
1888 
1889 	return skb->len ? : err;
1890 }
1891 
1892 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1893 		      u32 portid)
1894 {
1895 	struct inet_fill_args fillargs = {
1896 		.portid = portid,
1897 		.seq = nlh ? nlh->nlmsg_seq : 0,
1898 		.event = event,
1899 		.flags = 0,
1900 		.netnsid = -1,
1901 	};
1902 	struct sk_buff *skb;
1903 	int err = -ENOBUFS;
1904 	struct net *net;
1905 
1906 	net = dev_net(ifa->ifa_dev->dev);
1907 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1908 	if (!skb)
1909 		goto errout;
1910 
1911 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1912 	if (err < 0) {
1913 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1914 		WARN_ON(err == -EMSGSIZE);
1915 		kfree_skb(skb);
1916 		goto errout;
1917 	}
1918 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1919 	return;
1920 errout:
1921 	if (err < 0)
1922 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1923 }
1924 
1925 static size_t inet_get_link_af_size(const struct net_device *dev,
1926 				    u32 ext_filter_mask)
1927 {
1928 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1929 
1930 	if (!in_dev)
1931 		return 0;
1932 
1933 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1934 }
1935 
1936 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1937 			     u32 ext_filter_mask)
1938 {
1939 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1940 	struct nlattr *nla;
1941 	int i;
1942 
1943 	if (!in_dev)
1944 		return -ENODATA;
1945 
1946 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1947 	if (!nla)
1948 		return -EMSGSIZE;
1949 
1950 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1951 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1952 
1953 	return 0;
1954 }
1955 
1956 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1957 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1958 };
1959 
1960 static int inet_validate_link_af(const struct net_device *dev,
1961 				 const struct nlattr *nla,
1962 				 struct netlink_ext_ack *extack)
1963 {
1964 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1965 	int err, rem;
1966 
1967 	if (dev && !__in_dev_get_rtnl(dev))
1968 		return -EAFNOSUPPORT;
1969 
1970 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1971 					  inet_af_policy, extack);
1972 	if (err < 0)
1973 		return err;
1974 
1975 	if (tb[IFLA_INET_CONF]) {
1976 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1977 			int cfgid = nla_type(a);
1978 
1979 			if (nla_len(a) < 4)
1980 				return -EINVAL;
1981 
1982 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1983 				return -EINVAL;
1984 		}
1985 	}
1986 
1987 	return 0;
1988 }
1989 
1990 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
1991 			    struct netlink_ext_ack *extack)
1992 {
1993 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1994 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1995 	int rem;
1996 
1997 	if (!in_dev)
1998 		return -EAFNOSUPPORT;
1999 
2000 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2001 		return -EINVAL;
2002 
2003 	if (tb[IFLA_INET_CONF]) {
2004 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2005 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2006 	}
2007 
2008 	return 0;
2009 }
2010 
2011 static int inet_netconf_msgsize_devconf(int type)
2012 {
2013 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2014 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2015 	bool all = false;
2016 
2017 	if (type == NETCONFA_ALL)
2018 		all = true;
2019 
2020 	if (all || type == NETCONFA_FORWARDING)
2021 		size += nla_total_size(4);
2022 	if (all || type == NETCONFA_RP_FILTER)
2023 		size += nla_total_size(4);
2024 	if (all || type == NETCONFA_MC_FORWARDING)
2025 		size += nla_total_size(4);
2026 	if (all || type == NETCONFA_BC_FORWARDING)
2027 		size += nla_total_size(4);
2028 	if (all || type == NETCONFA_PROXY_NEIGH)
2029 		size += nla_total_size(4);
2030 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2031 		size += nla_total_size(4);
2032 
2033 	return size;
2034 }
2035 
2036 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2037 				     struct ipv4_devconf *devconf, u32 portid,
2038 				     u32 seq, int event, unsigned int flags,
2039 				     int type)
2040 {
2041 	struct nlmsghdr  *nlh;
2042 	struct netconfmsg *ncm;
2043 	bool all = false;
2044 
2045 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2046 			flags);
2047 	if (!nlh)
2048 		return -EMSGSIZE;
2049 
2050 	if (type == NETCONFA_ALL)
2051 		all = true;
2052 
2053 	ncm = nlmsg_data(nlh);
2054 	ncm->ncm_family = AF_INET;
2055 
2056 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2057 		goto nla_put_failure;
2058 
2059 	if (!devconf)
2060 		goto out;
2061 
2062 	if ((all || type == NETCONFA_FORWARDING) &&
2063 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2064 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2065 		goto nla_put_failure;
2066 	if ((all || type == NETCONFA_RP_FILTER) &&
2067 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2068 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2069 		goto nla_put_failure;
2070 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2071 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2072 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2073 		goto nla_put_failure;
2074 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2075 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2076 			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2077 		goto nla_put_failure;
2078 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2079 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2080 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2081 		goto nla_put_failure;
2082 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2083 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2084 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2085 		goto nla_put_failure;
2086 
2087 out:
2088 	nlmsg_end(skb, nlh);
2089 	return 0;
2090 
2091 nla_put_failure:
2092 	nlmsg_cancel(skb, nlh);
2093 	return -EMSGSIZE;
2094 }
2095 
2096 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2097 				 int ifindex, struct ipv4_devconf *devconf)
2098 {
2099 	struct sk_buff *skb;
2100 	int err = -ENOBUFS;
2101 
2102 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2103 	if (!skb)
2104 		goto errout;
2105 
2106 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2107 					event, 0, type);
2108 	if (err < 0) {
2109 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2110 		WARN_ON(err == -EMSGSIZE);
2111 		kfree_skb(skb);
2112 		goto errout;
2113 	}
2114 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2115 	return;
2116 errout:
2117 	if (err < 0)
2118 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2119 }
2120 
2121 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2122 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2123 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2124 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2125 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2126 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2127 };
2128 
2129 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2130 				      const struct nlmsghdr *nlh,
2131 				      struct nlattr **tb,
2132 				      struct netlink_ext_ack *extack)
2133 {
2134 	int i, err;
2135 
2136 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2137 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2138 		return -EINVAL;
2139 	}
2140 
2141 	if (!netlink_strict_get_check(skb))
2142 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2143 					      tb, NETCONFA_MAX,
2144 					      devconf_ipv4_policy, extack);
2145 
2146 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2147 					    tb, NETCONFA_MAX,
2148 					    devconf_ipv4_policy, extack);
2149 	if (err)
2150 		return err;
2151 
2152 	for (i = 0; i <= NETCONFA_MAX; i++) {
2153 		if (!tb[i])
2154 			continue;
2155 
2156 		switch (i) {
2157 		case NETCONFA_IFINDEX:
2158 			break;
2159 		default:
2160 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2161 			return -EINVAL;
2162 		}
2163 	}
2164 
2165 	return 0;
2166 }
2167 
2168 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2169 				    struct nlmsghdr *nlh,
2170 				    struct netlink_ext_ack *extack)
2171 {
2172 	struct net *net = sock_net(in_skb->sk);
2173 	struct nlattr *tb[NETCONFA_MAX+1];
2174 	struct sk_buff *skb;
2175 	struct ipv4_devconf *devconf;
2176 	struct in_device *in_dev;
2177 	struct net_device *dev;
2178 	int ifindex;
2179 	int err;
2180 
2181 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2182 	if (err)
2183 		goto errout;
2184 
2185 	err = -EINVAL;
2186 	if (!tb[NETCONFA_IFINDEX])
2187 		goto errout;
2188 
2189 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2190 	switch (ifindex) {
2191 	case NETCONFA_IFINDEX_ALL:
2192 		devconf = net->ipv4.devconf_all;
2193 		break;
2194 	case NETCONFA_IFINDEX_DEFAULT:
2195 		devconf = net->ipv4.devconf_dflt;
2196 		break;
2197 	default:
2198 		dev = __dev_get_by_index(net, ifindex);
2199 		if (!dev)
2200 			goto errout;
2201 		in_dev = __in_dev_get_rtnl(dev);
2202 		if (!in_dev)
2203 			goto errout;
2204 		devconf = &in_dev->cnf;
2205 		break;
2206 	}
2207 
2208 	err = -ENOBUFS;
2209 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2210 	if (!skb)
2211 		goto errout;
2212 
2213 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2214 					NETLINK_CB(in_skb).portid,
2215 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2216 					NETCONFA_ALL);
2217 	if (err < 0) {
2218 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2219 		WARN_ON(err == -EMSGSIZE);
2220 		kfree_skb(skb);
2221 		goto errout;
2222 	}
2223 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2224 errout:
2225 	return err;
2226 }
2227 
2228 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2229 				     struct netlink_callback *cb)
2230 {
2231 	const struct nlmsghdr *nlh = cb->nlh;
2232 	struct net *net = sock_net(skb->sk);
2233 	int h, s_h;
2234 	int idx, s_idx;
2235 	struct net_device *dev;
2236 	struct in_device *in_dev;
2237 	struct hlist_head *head;
2238 
2239 	if (cb->strict_check) {
2240 		struct netlink_ext_ack *extack = cb->extack;
2241 		struct netconfmsg *ncm;
2242 
2243 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2244 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2245 			return -EINVAL;
2246 		}
2247 
2248 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2249 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2250 			return -EINVAL;
2251 		}
2252 	}
2253 
2254 	s_h = cb->args[0];
2255 	s_idx = idx = cb->args[1];
2256 
2257 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2258 		idx = 0;
2259 		head = &net->dev_index_head[h];
2260 		rcu_read_lock();
2261 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2262 			  net->dev_base_seq;
2263 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2264 			if (idx < s_idx)
2265 				goto cont;
2266 			in_dev = __in_dev_get_rcu(dev);
2267 			if (!in_dev)
2268 				goto cont;
2269 
2270 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2271 						      &in_dev->cnf,
2272 						      NETLINK_CB(cb->skb).portid,
2273 						      nlh->nlmsg_seq,
2274 						      RTM_NEWNETCONF,
2275 						      NLM_F_MULTI,
2276 						      NETCONFA_ALL) < 0) {
2277 				rcu_read_unlock();
2278 				goto done;
2279 			}
2280 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2281 cont:
2282 			idx++;
2283 		}
2284 		rcu_read_unlock();
2285 	}
2286 	if (h == NETDEV_HASHENTRIES) {
2287 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2288 					      net->ipv4.devconf_all,
2289 					      NETLINK_CB(cb->skb).portid,
2290 					      nlh->nlmsg_seq,
2291 					      RTM_NEWNETCONF, NLM_F_MULTI,
2292 					      NETCONFA_ALL) < 0)
2293 			goto done;
2294 		else
2295 			h++;
2296 	}
2297 	if (h == NETDEV_HASHENTRIES + 1) {
2298 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2299 					      net->ipv4.devconf_dflt,
2300 					      NETLINK_CB(cb->skb).portid,
2301 					      nlh->nlmsg_seq,
2302 					      RTM_NEWNETCONF, NLM_F_MULTI,
2303 					      NETCONFA_ALL) < 0)
2304 			goto done;
2305 		else
2306 			h++;
2307 	}
2308 done:
2309 	cb->args[0] = h;
2310 	cb->args[1] = idx;
2311 
2312 	return skb->len;
2313 }
2314 
2315 #ifdef CONFIG_SYSCTL
2316 
2317 static void devinet_copy_dflt_conf(struct net *net, int i)
2318 {
2319 	struct net_device *dev;
2320 
2321 	rcu_read_lock();
2322 	for_each_netdev_rcu(net, dev) {
2323 		struct in_device *in_dev;
2324 
2325 		in_dev = __in_dev_get_rcu(dev);
2326 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2327 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2328 	}
2329 	rcu_read_unlock();
2330 }
2331 
2332 /* called with RTNL locked */
2333 static void inet_forward_change(struct net *net)
2334 {
2335 	struct net_device *dev;
2336 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2337 
2338 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2339 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2340 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2341 				    NETCONFA_FORWARDING,
2342 				    NETCONFA_IFINDEX_ALL,
2343 				    net->ipv4.devconf_all);
2344 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2345 				    NETCONFA_FORWARDING,
2346 				    NETCONFA_IFINDEX_DEFAULT,
2347 				    net->ipv4.devconf_dflt);
2348 
2349 	for_each_netdev(net, dev) {
2350 		struct in_device *in_dev;
2351 
2352 		if (on)
2353 			dev_disable_lro(dev);
2354 
2355 		in_dev = __in_dev_get_rtnl(dev);
2356 		if (in_dev) {
2357 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2358 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2359 						    NETCONFA_FORWARDING,
2360 						    dev->ifindex, &in_dev->cnf);
2361 		}
2362 	}
2363 }
2364 
2365 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2366 {
2367 	if (cnf == net->ipv4.devconf_dflt)
2368 		return NETCONFA_IFINDEX_DEFAULT;
2369 	else if (cnf == net->ipv4.devconf_all)
2370 		return NETCONFA_IFINDEX_ALL;
2371 	else {
2372 		struct in_device *idev
2373 			= container_of(cnf, struct in_device, cnf);
2374 		return idev->dev->ifindex;
2375 	}
2376 }
2377 
2378 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2379 			     void *buffer, size_t *lenp, loff_t *ppos)
2380 {
2381 	int old_value = *(int *)ctl->data;
2382 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2383 	int new_value = *(int *)ctl->data;
2384 
2385 	if (write) {
2386 		struct ipv4_devconf *cnf = ctl->extra1;
2387 		struct net *net = ctl->extra2;
2388 		int i = (int *)ctl->data - cnf->data;
2389 		int ifindex;
2390 
2391 		set_bit(i, cnf->state);
2392 
2393 		if (cnf == net->ipv4.devconf_dflt)
2394 			devinet_copy_dflt_conf(net, i);
2395 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2396 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2397 			if ((new_value == 0) && (old_value != 0))
2398 				rt_cache_flush(net);
2399 
2400 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2401 		    new_value != old_value)
2402 			rt_cache_flush(net);
2403 
2404 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2405 		    new_value != old_value) {
2406 			ifindex = devinet_conf_ifindex(net, cnf);
2407 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2408 						    NETCONFA_RP_FILTER,
2409 						    ifindex, cnf);
2410 		}
2411 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2412 		    new_value != old_value) {
2413 			ifindex = devinet_conf_ifindex(net, cnf);
2414 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2415 						    NETCONFA_PROXY_NEIGH,
2416 						    ifindex, cnf);
2417 		}
2418 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2419 		    new_value != old_value) {
2420 			ifindex = devinet_conf_ifindex(net, cnf);
2421 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2422 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2423 						    ifindex, cnf);
2424 		}
2425 	}
2426 
2427 	return ret;
2428 }
2429 
2430 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2431 				  void *buffer, size_t *lenp, loff_t *ppos)
2432 {
2433 	int *valp = ctl->data;
2434 	int val = *valp;
2435 	loff_t pos = *ppos;
2436 	struct net *net = ctl->extra2;
2437 	int ret;
2438 
2439 	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2440 		return -EPERM;
2441 
2442 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2443 
2444 	if (write && *valp != val) {
2445 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2446 			if (!rtnl_trylock()) {
2447 				/* Restore the original values before restarting */
2448 				*valp = val;
2449 				*ppos = pos;
2450 				return restart_syscall();
2451 			}
2452 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2453 				inet_forward_change(net);
2454 			} else {
2455 				struct ipv4_devconf *cnf = ctl->extra1;
2456 				struct in_device *idev =
2457 					container_of(cnf, struct in_device, cnf);
2458 				if (*valp)
2459 					dev_disable_lro(idev->dev);
2460 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2461 							    NETCONFA_FORWARDING,
2462 							    idev->dev->ifindex,
2463 							    cnf);
2464 			}
2465 			rtnl_unlock();
2466 			rt_cache_flush(net);
2467 		} else
2468 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2469 						    NETCONFA_FORWARDING,
2470 						    NETCONFA_IFINDEX_DEFAULT,
2471 						    net->ipv4.devconf_dflt);
2472 	}
2473 
2474 	return ret;
2475 }
2476 
2477 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2478 				void *buffer, size_t *lenp, loff_t *ppos)
2479 {
2480 	int *valp = ctl->data;
2481 	int val = *valp;
2482 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2483 	struct net *net = ctl->extra2;
2484 
2485 	if (write && *valp != val)
2486 		rt_cache_flush(net);
2487 
2488 	return ret;
2489 }
2490 
2491 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2492 	{ \
2493 		.procname	= name, \
2494 		.data		= ipv4_devconf.data + \
2495 				  IPV4_DEVCONF_ ## attr - 1, \
2496 		.maxlen		= sizeof(int), \
2497 		.mode		= mval, \
2498 		.proc_handler	= proc, \
2499 		.extra1		= &ipv4_devconf, \
2500 	}
2501 
2502 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2503 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2504 
2505 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2506 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2507 
2508 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2509 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2510 
2511 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2512 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2513 
2514 static struct devinet_sysctl_table {
2515 	struct ctl_table_header *sysctl_header;
2516 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2517 } devinet_sysctl = {
2518 	.devinet_vars = {
2519 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2520 					     devinet_sysctl_forward),
2521 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2522 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2523 
2524 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2525 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2526 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2527 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2528 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2529 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2530 					"accept_source_route"),
2531 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2532 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2533 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2534 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2535 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2536 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2537 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2538 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2539 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2540 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2541 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2542 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2543 		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2544 					"arp_evict_nocarrier"),
2545 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2546 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2547 					"force_igmp_version"),
2548 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2549 					"igmpv2_unsolicited_report_interval"),
2550 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2551 					"igmpv3_unsolicited_report_interval"),
2552 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2553 					"ignore_routes_with_linkdown"),
2554 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2555 					"drop_gratuitous_arp"),
2556 
2557 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2558 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2559 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2560 					      "promote_secondaries"),
2561 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2562 					      "route_localnet"),
2563 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2564 					      "drop_unicast_in_l2_multicast"),
2565 	},
2566 };
2567 
2568 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2569 				     int ifindex, struct ipv4_devconf *p)
2570 {
2571 	int i;
2572 	struct devinet_sysctl_table *t;
2573 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2574 
2575 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2576 	if (!t)
2577 		goto out;
2578 
2579 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2580 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2581 		t->devinet_vars[i].extra1 = p;
2582 		t->devinet_vars[i].extra2 = net;
2583 	}
2584 
2585 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2586 
2587 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2588 	if (!t->sysctl_header)
2589 		goto free;
2590 
2591 	p->sysctl = t;
2592 
2593 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2594 				    ifindex, p);
2595 	return 0;
2596 
2597 free:
2598 	kfree(t);
2599 out:
2600 	return -ENOMEM;
2601 }
2602 
2603 static void __devinet_sysctl_unregister(struct net *net,
2604 					struct ipv4_devconf *cnf, int ifindex)
2605 {
2606 	struct devinet_sysctl_table *t = cnf->sysctl;
2607 
2608 	if (t) {
2609 		cnf->sysctl = NULL;
2610 		unregister_net_sysctl_table(t->sysctl_header);
2611 		kfree(t);
2612 	}
2613 
2614 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2615 }
2616 
2617 static int devinet_sysctl_register(struct in_device *idev)
2618 {
2619 	int err;
2620 
2621 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2622 		return -EINVAL;
2623 
2624 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2625 	if (err)
2626 		return err;
2627 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2628 					idev->dev->ifindex, &idev->cnf);
2629 	if (err)
2630 		neigh_sysctl_unregister(idev->arp_parms);
2631 	return err;
2632 }
2633 
2634 static void devinet_sysctl_unregister(struct in_device *idev)
2635 {
2636 	struct net *net = dev_net(idev->dev);
2637 
2638 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2639 	neigh_sysctl_unregister(idev->arp_parms);
2640 }
2641 
2642 static struct ctl_table ctl_forward_entry[] = {
2643 	{
2644 		.procname	= "ip_forward",
2645 		.data		= &ipv4_devconf.data[
2646 					IPV4_DEVCONF_FORWARDING - 1],
2647 		.maxlen		= sizeof(int),
2648 		.mode		= 0644,
2649 		.proc_handler	= devinet_sysctl_forward,
2650 		.extra1		= &ipv4_devconf,
2651 		.extra2		= &init_net,
2652 	},
2653 	{ },
2654 };
2655 #endif
2656 
2657 static __net_init int devinet_init_net(struct net *net)
2658 {
2659 	int err;
2660 	struct ipv4_devconf *all, *dflt;
2661 #ifdef CONFIG_SYSCTL
2662 	struct ctl_table *tbl;
2663 	struct ctl_table_header *forw_hdr;
2664 #endif
2665 
2666 	err = -ENOMEM;
2667 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2668 	if (!all)
2669 		goto err_alloc_all;
2670 
2671 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2672 	if (!dflt)
2673 		goto err_alloc_dflt;
2674 
2675 #ifdef CONFIG_SYSCTL
2676 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2677 	if (!tbl)
2678 		goto err_alloc_ctl;
2679 
2680 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2681 	tbl[0].extra1 = all;
2682 	tbl[0].extra2 = net;
2683 #endif
2684 
2685 	if (!net_eq(net, &init_net)) {
2686 		switch (net_inherit_devconf()) {
2687 		case 3:
2688 			/* copy from the current netns */
2689 			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2690 			       sizeof(ipv4_devconf));
2691 			memcpy(dflt,
2692 			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2693 			       sizeof(ipv4_devconf_dflt));
2694 			break;
2695 		case 0:
2696 		case 1:
2697 			/* copy from init_net */
2698 			memcpy(all, init_net.ipv4.devconf_all,
2699 			       sizeof(ipv4_devconf));
2700 			memcpy(dflt, init_net.ipv4.devconf_dflt,
2701 			       sizeof(ipv4_devconf_dflt));
2702 			break;
2703 		case 2:
2704 			/* use compiled values */
2705 			break;
2706 		}
2707 	}
2708 
2709 #ifdef CONFIG_SYSCTL
2710 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2711 	if (err < 0)
2712 		goto err_reg_all;
2713 
2714 	err = __devinet_sysctl_register(net, "default",
2715 					NETCONFA_IFINDEX_DEFAULT, dflt);
2716 	if (err < 0)
2717 		goto err_reg_dflt;
2718 
2719 	err = -ENOMEM;
2720 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2721 	if (!forw_hdr)
2722 		goto err_reg_ctl;
2723 	net->ipv4.forw_hdr = forw_hdr;
2724 #endif
2725 
2726 	net->ipv4.devconf_all = all;
2727 	net->ipv4.devconf_dflt = dflt;
2728 	return 0;
2729 
2730 #ifdef CONFIG_SYSCTL
2731 err_reg_ctl:
2732 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2733 err_reg_dflt:
2734 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2735 err_reg_all:
2736 	kfree(tbl);
2737 err_alloc_ctl:
2738 #endif
2739 	kfree(dflt);
2740 err_alloc_dflt:
2741 	kfree(all);
2742 err_alloc_all:
2743 	return err;
2744 }
2745 
2746 static __net_exit void devinet_exit_net(struct net *net)
2747 {
2748 #ifdef CONFIG_SYSCTL
2749 	struct ctl_table *tbl;
2750 
2751 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2752 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2753 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2754 				    NETCONFA_IFINDEX_DEFAULT);
2755 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2756 				    NETCONFA_IFINDEX_ALL);
2757 	kfree(tbl);
2758 #endif
2759 	kfree(net->ipv4.devconf_dflt);
2760 	kfree(net->ipv4.devconf_all);
2761 }
2762 
2763 static __net_initdata struct pernet_operations devinet_ops = {
2764 	.init = devinet_init_net,
2765 	.exit = devinet_exit_net,
2766 };
2767 
2768 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2769 	.family		  = AF_INET,
2770 	.fill_link_af	  = inet_fill_link_af,
2771 	.get_link_af_size = inet_get_link_af_size,
2772 	.validate_link_af = inet_validate_link_af,
2773 	.set_link_af	  = inet_set_link_af,
2774 };
2775 
2776 void __init devinet_init(void)
2777 {
2778 	int i;
2779 
2780 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2781 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2782 
2783 	register_pernet_subsys(&devinet_ops);
2784 	register_netdevice_notifier(&ip_netdev_notifier);
2785 
2786 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2787 
2788 	rtnl_af_register(&inet_af_ops);
2789 
2790 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2791 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2792 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2793 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2794 		      inet_netconf_dump_devconf, 0);
2795 }
2796