xref: /openbmc/linux/net/ipv4/devinet.c (revision d4c4653b)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	NET3	IP device support routines.
4  *
5  *	Derived from the IP parts of dev.c 1.0.19
6  * 		Authors:	Ross Biro
7  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *	Additional Authors:
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *	Changes:
15  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16  *					lists.
17  *		Cyrus Durgin:		updated for kmod
18  *		Matthias Andree:	in devinet_ioctl, compare label and
19  *					address (4.4BSD alias style support),
20  *					fall back to comparing just the label
21  *					if no match found.
22  */
23 
24 
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56 
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64 
65 #define IPV6ONLY_FLAGS	\
66 		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79 	},
80 };
81 
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83 	.data = {
84 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92 	},
93 };
94 
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97 
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 	[IFA_LOCAL]     	= { .type = NLA_U32 },
100 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
101 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104 	[IFA_FLAGS]		= { .type = NLA_U32 },
105 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107 	[IFA_PROTO]		= { .type = NLA_U8 },
108 };
109 
110 struct inet_fill_args {
111 	u32 portid;
112 	u32 seq;
113 	int event;
114 	unsigned int flags;
115 	int netnsid;
116 	int ifindex;
117 };
118 
119 #define IN4_ADDR_HSIZE_SHIFT	8
120 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
121 
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123 
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
125 {
126 	u32 val = (__force u32) addr ^ net_hash_mix(net);
127 
128 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129 }
130 
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132 {
133 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
134 
135 	ASSERT_RTNL();
136 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137 }
138 
139 static void inet_hash_remove(struct in_ifaddr *ifa)
140 {
141 	ASSERT_RTNL();
142 	hlist_del_init_rcu(&ifa->hash);
143 }
144 
145 /**
146  * __ip_dev_find - find the first device with a given source address.
147  * @net: the net namespace
148  * @addr: the source address
149  * @devref: if true, take a reference on the found device
150  *
151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
152  */
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154 {
155 	struct net_device *result = NULL;
156 	struct in_ifaddr *ifa;
157 
158 	rcu_read_lock();
159 	ifa = inet_lookup_ifaddr_rcu(net, addr);
160 	if (!ifa) {
161 		struct flowi4 fl4 = { .daddr = addr };
162 		struct fib_result res = { 0 };
163 		struct fib_table *local;
164 
165 		/* Fallback to FIB local table so that communication
166 		 * over loopback subnets work.
167 		 */
168 		local = fib_get_table(net, RT_TABLE_LOCAL);
169 		if (local &&
170 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171 		    res.type == RTN_LOCAL)
172 			result = FIB_RES_DEV(res);
173 	} else {
174 		result = ifa->ifa_dev->dev;
175 	}
176 	if (result && devref)
177 		dev_hold(result);
178 	rcu_read_unlock();
179 	return result;
180 }
181 EXPORT_SYMBOL(__ip_dev_find);
182 
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185 {
186 	u32 hash = inet_addr_hash(net, addr);
187 	struct in_ifaddr *ifa;
188 
189 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190 		if (ifa->ifa_local == addr &&
191 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
192 			return ifa;
193 
194 	return NULL;
195 }
196 
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198 
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202 			 struct in_ifaddr __rcu **ifap,
203 			 int destroy);
204 #ifdef CONFIG_SYSCTL
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
207 #else
208 static int devinet_sysctl_register(struct in_device *idev)
209 {
210 	return 0;
211 }
212 static void devinet_sysctl_unregister(struct in_device *idev)
213 {
214 }
215 #endif
216 
217 /* Locks all the inet devices. */
218 
219 static struct in_ifaddr *inet_alloc_ifa(void)
220 {
221 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222 }
223 
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227 	if (ifa->ifa_dev)
228 		in_dev_put(ifa->ifa_dev);
229 	kfree(ifa);
230 }
231 
232 static void inet_free_ifa(struct in_ifaddr *ifa)
233 {
234 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
235 }
236 
237 static void in_dev_free_rcu(struct rcu_head *head)
238 {
239 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
240 
241 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
242 	kfree(idev);
243 }
244 
245 void in_dev_finish_destroy(struct in_device *idev)
246 {
247 	struct net_device *dev = idev->dev;
248 
249 	WARN_ON(idev->ifa_list);
250 	WARN_ON(idev->mc_list);
251 #ifdef NET_REFCNT_DEBUG
252 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
253 #endif
254 	netdev_put(dev, &idev->dev_tracker);
255 	if (!idev->dead)
256 		pr_err("Freeing alive in_device %p\n", idev);
257 	else
258 		call_rcu(&idev->rcu_head, in_dev_free_rcu);
259 }
260 EXPORT_SYMBOL(in_dev_finish_destroy);
261 
262 static struct in_device *inetdev_init(struct net_device *dev)
263 {
264 	struct in_device *in_dev;
265 	int err = -ENOMEM;
266 
267 	ASSERT_RTNL();
268 
269 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
270 	if (!in_dev)
271 		goto out;
272 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
273 			sizeof(in_dev->cnf));
274 	in_dev->cnf.sysctl = NULL;
275 	in_dev->dev = dev;
276 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
277 	if (!in_dev->arp_parms)
278 		goto out_kfree;
279 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
280 		dev_disable_lro(dev);
281 	/* Reference in_dev->dev */
282 	netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
283 	/* Account for reference dev->ip_ptr (below) */
284 	refcount_set(&in_dev->refcnt, 1);
285 
286 	err = devinet_sysctl_register(in_dev);
287 	if (err) {
288 		in_dev->dead = 1;
289 		neigh_parms_release(&arp_tbl, in_dev->arp_parms);
290 		in_dev_put(in_dev);
291 		in_dev = NULL;
292 		goto out;
293 	}
294 	ip_mc_init_dev(in_dev);
295 	if (dev->flags & IFF_UP)
296 		ip_mc_up(in_dev);
297 
298 	/* we can receive as soon as ip_ptr is set -- do this last */
299 	rcu_assign_pointer(dev->ip_ptr, in_dev);
300 out:
301 	return in_dev ?: ERR_PTR(err);
302 out_kfree:
303 	kfree(in_dev);
304 	in_dev = NULL;
305 	goto out;
306 }
307 
308 static void inetdev_destroy(struct in_device *in_dev)
309 {
310 	struct net_device *dev;
311 	struct in_ifaddr *ifa;
312 
313 	ASSERT_RTNL();
314 
315 	dev = in_dev->dev;
316 
317 	in_dev->dead = 1;
318 
319 	ip_mc_destroy_dev(in_dev);
320 
321 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
322 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
323 		inet_free_ifa(ifa);
324 	}
325 
326 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
327 
328 	devinet_sysctl_unregister(in_dev);
329 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
330 	arp_ifdown(dev);
331 
332 	in_dev_put(in_dev);
333 }
334 
335 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
336 {
337 	const struct in_ifaddr *ifa;
338 
339 	rcu_read_lock();
340 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
341 		if (inet_ifa_match(a, ifa)) {
342 			if (!b || inet_ifa_match(b, ifa)) {
343 				rcu_read_unlock();
344 				return 1;
345 			}
346 		}
347 	}
348 	rcu_read_unlock();
349 	return 0;
350 }
351 
352 static void __inet_del_ifa(struct in_device *in_dev,
353 			   struct in_ifaddr __rcu **ifap,
354 			   int destroy, struct nlmsghdr *nlh, u32 portid)
355 {
356 	struct in_ifaddr *promote = NULL;
357 	struct in_ifaddr *ifa, *ifa1;
358 	struct in_ifaddr __rcu **last_prim;
359 	struct in_ifaddr *prev_prom = NULL;
360 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
361 
362 	ASSERT_RTNL();
363 
364 	ifa1 = rtnl_dereference(*ifap);
365 	last_prim = ifap;
366 	if (in_dev->dead)
367 		goto no_promotions;
368 
369 	/* 1. Deleting primary ifaddr forces deletion all secondaries
370 	 * unless alias promotion is set
371 	 **/
372 
373 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
374 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
375 
376 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
377 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
378 			    ifa1->ifa_scope <= ifa->ifa_scope)
379 				last_prim = &ifa->ifa_next;
380 
381 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
382 			    ifa1->ifa_mask != ifa->ifa_mask ||
383 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
384 				ifap1 = &ifa->ifa_next;
385 				prev_prom = ifa;
386 				continue;
387 			}
388 
389 			if (!do_promote) {
390 				inet_hash_remove(ifa);
391 				*ifap1 = ifa->ifa_next;
392 
393 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
394 				blocking_notifier_call_chain(&inetaddr_chain,
395 						NETDEV_DOWN, ifa);
396 				inet_free_ifa(ifa);
397 			} else {
398 				promote = ifa;
399 				break;
400 			}
401 		}
402 	}
403 
404 	/* On promotion all secondaries from subnet are changing
405 	 * the primary IP, we must remove all their routes silently
406 	 * and later to add them back with new prefsrc. Do this
407 	 * while all addresses are on the device list.
408 	 */
409 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
410 		if (ifa1->ifa_mask == ifa->ifa_mask &&
411 		    inet_ifa_match(ifa1->ifa_address, ifa))
412 			fib_del_ifaddr(ifa, ifa1);
413 	}
414 
415 no_promotions:
416 	/* 2. Unlink it */
417 
418 	*ifap = ifa1->ifa_next;
419 	inet_hash_remove(ifa1);
420 
421 	/* 3. Announce address deletion */
422 
423 	/* Send message first, then call notifier.
424 	   At first sight, FIB update triggered by notifier
425 	   will refer to already deleted ifaddr, that could confuse
426 	   netlink listeners. It is not true: look, gated sees
427 	   that route deleted and if it still thinks that ifaddr
428 	   is valid, it will try to restore deleted routes... Grr.
429 	   So that, this order is correct.
430 	 */
431 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
432 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
433 
434 	if (promote) {
435 		struct in_ifaddr *next_sec;
436 
437 		next_sec = rtnl_dereference(promote->ifa_next);
438 		if (prev_prom) {
439 			struct in_ifaddr *last_sec;
440 
441 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
442 
443 			last_sec = rtnl_dereference(*last_prim);
444 			rcu_assign_pointer(promote->ifa_next, last_sec);
445 			rcu_assign_pointer(*last_prim, promote);
446 		}
447 
448 		promote->ifa_flags &= ~IFA_F_SECONDARY;
449 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
450 		blocking_notifier_call_chain(&inetaddr_chain,
451 				NETDEV_UP, promote);
452 		for (ifa = next_sec; ifa;
453 		     ifa = rtnl_dereference(ifa->ifa_next)) {
454 			if (ifa1->ifa_mask != ifa->ifa_mask ||
455 			    !inet_ifa_match(ifa1->ifa_address, ifa))
456 					continue;
457 			fib_add_ifaddr(ifa);
458 		}
459 
460 	}
461 	if (destroy)
462 		inet_free_ifa(ifa1);
463 }
464 
465 static void inet_del_ifa(struct in_device *in_dev,
466 			 struct in_ifaddr __rcu **ifap,
467 			 int destroy)
468 {
469 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
470 }
471 
472 static void check_lifetime(struct work_struct *work);
473 
474 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
475 
476 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
477 			     u32 portid, struct netlink_ext_ack *extack)
478 {
479 	struct in_ifaddr __rcu **last_primary, **ifap;
480 	struct in_device *in_dev = ifa->ifa_dev;
481 	struct in_validator_info ivi;
482 	struct in_ifaddr *ifa1;
483 	int ret;
484 
485 	ASSERT_RTNL();
486 
487 	if (!ifa->ifa_local) {
488 		inet_free_ifa(ifa);
489 		return 0;
490 	}
491 
492 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
493 	last_primary = &in_dev->ifa_list;
494 
495 	/* Don't set IPv6 only flags to IPv4 addresses */
496 	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
497 
498 	ifap = &in_dev->ifa_list;
499 	ifa1 = rtnl_dereference(*ifap);
500 
501 	while (ifa1) {
502 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
503 		    ifa->ifa_scope <= ifa1->ifa_scope)
504 			last_primary = &ifa1->ifa_next;
505 		if (ifa1->ifa_mask == ifa->ifa_mask &&
506 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
507 			if (ifa1->ifa_local == ifa->ifa_local) {
508 				inet_free_ifa(ifa);
509 				return -EEXIST;
510 			}
511 			if (ifa1->ifa_scope != ifa->ifa_scope) {
512 				NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
513 				inet_free_ifa(ifa);
514 				return -EINVAL;
515 			}
516 			ifa->ifa_flags |= IFA_F_SECONDARY;
517 		}
518 
519 		ifap = &ifa1->ifa_next;
520 		ifa1 = rtnl_dereference(*ifap);
521 	}
522 
523 	/* Allow any devices that wish to register ifaddr validtors to weigh
524 	 * in now, before changes are committed.  The rntl lock is serializing
525 	 * access here, so the state should not change between a validator call
526 	 * and a final notify on commit.  This isn't invoked on promotion under
527 	 * the assumption that validators are checking the address itself, and
528 	 * not the flags.
529 	 */
530 	ivi.ivi_addr = ifa->ifa_address;
531 	ivi.ivi_dev = ifa->ifa_dev;
532 	ivi.extack = extack;
533 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
534 					   NETDEV_UP, &ivi);
535 	ret = notifier_to_errno(ret);
536 	if (ret) {
537 		inet_free_ifa(ifa);
538 		return ret;
539 	}
540 
541 	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
542 		ifap = last_primary;
543 
544 	rcu_assign_pointer(ifa->ifa_next, *ifap);
545 	rcu_assign_pointer(*ifap, ifa);
546 
547 	inet_hash_insert(dev_net(in_dev->dev), ifa);
548 
549 	cancel_delayed_work(&check_lifetime_work);
550 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
551 
552 	/* Send message first, then call notifier.
553 	   Notifier will trigger FIB update, so that
554 	   listeners of netlink will know about new ifaddr */
555 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
556 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
557 
558 	return 0;
559 }
560 
561 static int inet_insert_ifa(struct in_ifaddr *ifa)
562 {
563 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
564 }
565 
566 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
567 {
568 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
569 
570 	ASSERT_RTNL();
571 
572 	ipv4_devconf_setall(in_dev);
573 	neigh_parms_data_state_setall(in_dev->arp_parms);
574 	if (ifa->ifa_dev != in_dev) {
575 		WARN_ON(ifa->ifa_dev);
576 		in_dev_hold(in_dev);
577 		ifa->ifa_dev = in_dev;
578 	}
579 	if (ipv4_is_loopback(ifa->ifa_local))
580 		ifa->ifa_scope = RT_SCOPE_HOST;
581 	return inet_insert_ifa(ifa);
582 }
583 
584 /* Caller must hold RCU or RTNL :
585  * We dont take a reference on found in_device
586  */
587 struct in_device *inetdev_by_index(struct net *net, int ifindex)
588 {
589 	struct net_device *dev;
590 	struct in_device *in_dev = NULL;
591 
592 	rcu_read_lock();
593 	dev = dev_get_by_index_rcu(net, ifindex);
594 	if (dev)
595 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
596 	rcu_read_unlock();
597 	return in_dev;
598 }
599 EXPORT_SYMBOL(inetdev_by_index);
600 
601 /* Called only from RTNL semaphored context. No locks. */
602 
603 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
604 				    __be32 mask)
605 {
606 	struct in_ifaddr *ifa;
607 
608 	ASSERT_RTNL();
609 
610 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
611 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
612 			return ifa;
613 	}
614 	return NULL;
615 }
616 
617 static int ip_mc_autojoin_config(struct net *net, bool join,
618 				 const struct in_ifaddr *ifa)
619 {
620 #if defined(CONFIG_IP_MULTICAST)
621 	struct ip_mreqn mreq = {
622 		.imr_multiaddr.s_addr = ifa->ifa_address,
623 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
624 	};
625 	struct sock *sk = net->ipv4.mc_autojoin_sk;
626 	int ret;
627 
628 	ASSERT_RTNL();
629 
630 	lock_sock(sk);
631 	if (join)
632 		ret = ip_mc_join_group(sk, &mreq);
633 	else
634 		ret = ip_mc_leave_group(sk, &mreq);
635 	release_sock(sk);
636 
637 	return ret;
638 #else
639 	return -EOPNOTSUPP;
640 #endif
641 }
642 
643 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
644 			    struct netlink_ext_ack *extack)
645 {
646 	struct net *net = sock_net(skb->sk);
647 	struct in_ifaddr __rcu **ifap;
648 	struct nlattr *tb[IFA_MAX+1];
649 	struct in_device *in_dev;
650 	struct ifaddrmsg *ifm;
651 	struct in_ifaddr *ifa;
652 	int err;
653 
654 	ASSERT_RTNL();
655 
656 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
657 				     ifa_ipv4_policy, extack);
658 	if (err < 0)
659 		goto errout;
660 
661 	ifm = nlmsg_data(nlh);
662 	in_dev = inetdev_by_index(net, ifm->ifa_index);
663 	if (!in_dev) {
664 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
665 		err = -ENODEV;
666 		goto errout;
667 	}
668 
669 	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
670 	     ifap = &ifa->ifa_next) {
671 		if (tb[IFA_LOCAL] &&
672 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
673 			continue;
674 
675 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
676 			continue;
677 
678 		if (tb[IFA_ADDRESS] &&
679 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
680 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
681 			continue;
682 
683 		if (ipv4_is_multicast(ifa->ifa_address))
684 			ip_mc_autojoin_config(net, false, ifa);
685 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
686 		return 0;
687 	}
688 
689 	NL_SET_ERR_MSG(extack, "ipv4: Address not found");
690 	err = -EADDRNOTAVAIL;
691 errout:
692 	return err;
693 }
694 
695 #define INFINITY_LIFE_TIME	0xFFFFFFFF
696 
697 static void check_lifetime(struct work_struct *work)
698 {
699 	unsigned long now, next, next_sec, next_sched;
700 	struct in_ifaddr *ifa;
701 	struct hlist_node *n;
702 	int i;
703 
704 	now = jiffies;
705 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
706 
707 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
708 		bool change_needed = false;
709 
710 		rcu_read_lock();
711 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
712 			unsigned long age;
713 
714 			if (ifa->ifa_flags & IFA_F_PERMANENT)
715 				continue;
716 
717 			/* We try to batch several events at once. */
718 			age = (now - ifa->ifa_tstamp +
719 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
720 
721 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
722 			    age >= ifa->ifa_valid_lft) {
723 				change_needed = true;
724 			} else if (ifa->ifa_preferred_lft ==
725 				   INFINITY_LIFE_TIME) {
726 				continue;
727 			} else if (age >= ifa->ifa_preferred_lft) {
728 				if (time_before(ifa->ifa_tstamp +
729 						ifa->ifa_valid_lft * HZ, next))
730 					next = ifa->ifa_tstamp +
731 					       ifa->ifa_valid_lft * HZ;
732 
733 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
734 					change_needed = true;
735 			} else if (time_before(ifa->ifa_tstamp +
736 					       ifa->ifa_preferred_lft * HZ,
737 					       next)) {
738 				next = ifa->ifa_tstamp +
739 				       ifa->ifa_preferred_lft * HZ;
740 			}
741 		}
742 		rcu_read_unlock();
743 		if (!change_needed)
744 			continue;
745 		rtnl_lock();
746 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
747 			unsigned long age;
748 
749 			if (ifa->ifa_flags & IFA_F_PERMANENT)
750 				continue;
751 
752 			/* We try to batch several events at once. */
753 			age = (now - ifa->ifa_tstamp +
754 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
755 
756 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
757 			    age >= ifa->ifa_valid_lft) {
758 				struct in_ifaddr __rcu **ifap;
759 				struct in_ifaddr *tmp;
760 
761 				ifap = &ifa->ifa_dev->ifa_list;
762 				tmp = rtnl_dereference(*ifap);
763 				while (tmp) {
764 					if (tmp == ifa) {
765 						inet_del_ifa(ifa->ifa_dev,
766 							     ifap, 1);
767 						break;
768 					}
769 					ifap = &tmp->ifa_next;
770 					tmp = rtnl_dereference(*ifap);
771 				}
772 			} else if (ifa->ifa_preferred_lft !=
773 				   INFINITY_LIFE_TIME &&
774 				   age >= ifa->ifa_preferred_lft &&
775 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
776 				ifa->ifa_flags |= IFA_F_DEPRECATED;
777 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
778 			}
779 		}
780 		rtnl_unlock();
781 	}
782 
783 	next_sec = round_jiffies_up(next);
784 	next_sched = next;
785 
786 	/* If rounded timeout is accurate enough, accept it. */
787 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
788 		next_sched = next_sec;
789 
790 	now = jiffies;
791 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
792 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
793 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
794 
795 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
796 			next_sched - now);
797 }
798 
799 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
800 			     __u32 prefered_lft)
801 {
802 	unsigned long timeout;
803 
804 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
805 
806 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
807 	if (addrconf_finite_timeout(timeout))
808 		ifa->ifa_valid_lft = timeout;
809 	else
810 		ifa->ifa_flags |= IFA_F_PERMANENT;
811 
812 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
813 	if (addrconf_finite_timeout(timeout)) {
814 		if (timeout == 0)
815 			ifa->ifa_flags |= IFA_F_DEPRECATED;
816 		ifa->ifa_preferred_lft = timeout;
817 	}
818 	ifa->ifa_tstamp = jiffies;
819 	if (!ifa->ifa_cstamp)
820 		ifa->ifa_cstamp = ifa->ifa_tstamp;
821 }
822 
823 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
824 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
825 				       struct netlink_ext_ack *extack)
826 {
827 	struct nlattr *tb[IFA_MAX+1];
828 	struct in_ifaddr *ifa;
829 	struct ifaddrmsg *ifm;
830 	struct net_device *dev;
831 	struct in_device *in_dev;
832 	int err;
833 
834 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
835 				     ifa_ipv4_policy, extack);
836 	if (err < 0)
837 		goto errout;
838 
839 	ifm = nlmsg_data(nlh);
840 	err = -EINVAL;
841 
842 	if (ifm->ifa_prefixlen > 32) {
843 		NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
844 		goto errout;
845 	}
846 
847 	if (!tb[IFA_LOCAL]) {
848 		NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
849 		goto errout;
850 	}
851 
852 	dev = __dev_get_by_index(net, ifm->ifa_index);
853 	err = -ENODEV;
854 	if (!dev) {
855 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
856 		goto errout;
857 	}
858 
859 	in_dev = __in_dev_get_rtnl(dev);
860 	err = -ENOBUFS;
861 	if (!in_dev)
862 		goto errout;
863 
864 	ifa = inet_alloc_ifa();
865 	if (!ifa)
866 		/*
867 		 * A potential indev allocation can be left alive, it stays
868 		 * assigned to its device and is destroy with it.
869 		 */
870 		goto errout;
871 
872 	ipv4_devconf_setall(in_dev);
873 	neigh_parms_data_state_setall(in_dev->arp_parms);
874 	in_dev_hold(in_dev);
875 
876 	if (!tb[IFA_ADDRESS])
877 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
878 
879 	INIT_HLIST_NODE(&ifa->hash);
880 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
881 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
882 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
883 					 ifm->ifa_flags;
884 	ifa->ifa_scope = ifm->ifa_scope;
885 	ifa->ifa_dev = in_dev;
886 
887 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
888 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
889 
890 	if (tb[IFA_BROADCAST])
891 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
892 
893 	if (tb[IFA_LABEL])
894 		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
895 	else
896 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
897 
898 	if (tb[IFA_RT_PRIORITY])
899 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
900 
901 	if (tb[IFA_PROTO])
902 		ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
903 
904 	if (tb[IFA_CACHEINFO]) {
905 		struct ifa_cacheinfo *ci;
906 
907 		ci = nla_data(tb[IFA_CACHEINFO]);
908 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
909 			NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
910 			err = -EINVAL;
911 			goto errout_free;
912 		}
913 		*pvalid_lft = ci->ifa_valid;
914 		*pprefered_lft = ci->ifa_prefered;
915 	}
916 
917 	return ifa;
918 
919 errout_free:
920 	inet_free_ifa(ifa);
921 errout:
922 	return ERR_PTR(err);
923 }
924 
925 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
926 {
927 	struct in_device *in_dev = ifa->ifa_dev;
928 	struct in_ifaddr *ifa1;
929 
930 	if (!ifa->ifa_local)
931 		return NULL;
932 
933 	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
934 		if (ifa1->ifa_mask == ifa->ifa_mask &&
935 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
936 		    ifa1->ifa_local == ifa->ifa_local)
937 			return ifa1;
938 	}
939 	return NULL;
940 }
941 
942 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
943 			    struct netlink_ext_ack *extack)
944 {
945 	struct net *net = sock_net(skb->sk);
946 	struct in_ifaddr *ifa;
947 	struct in_ifaddr *ifa_existing;
948 	__u32 valid_lft = INFINITY_LIFE_TIME;
949 	__u32 prefered_lft = INFINITY_LIFE_TIME;
950 
951 	ASSERT_RTNL();
952 
953 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
954 	if (IS_ERR(ifa))
955 		return PTR_ERR(ifa);
956 
957 	ifa_existing = find_matching_ifa(ifa);
958 	if (!ifa_existing) {
959 		/* It would be best to check for !NLM_F_CREATE here but
960 		 * userspace already relies on not having to provide this.
961 		 */
962 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
963 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
964 			int ret = ip_mc_autojoin_config(net, true, ifa);
965 
966 			if (ret < 0) {
967 				NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
968 				inet_free_ifa(ifa);
969 				return ret;
970 			}
971 		}
972 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
973 					 extack);
974 	} else {
975 		u32 new_metric = ifa->ifa_rt_priority;
976 		u8 new_proto = ifa->ifa_proto;
977 
978 		inet_free_ifa(ifa);
979 
980 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
981 		    !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
982 			NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
983 			return -EEXIST;
984 		}
985 		ifa = ifa_existing;
986 
987 		if (ifa->ifa_rt_priority != new_metric) {
988 			fib_modify_prefix_metric(ifa, new_metric);
989 			ifa->ifa_rt_priority = new_metric;
990 		}
991 
992 		ifa->ifa_proto = new_proto;
993 
994 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
995 		cancel_delayed_work(&check_lifetime_work);
996 		queue_delayed_work(system_power_efficient_wq,
997 				&check_lifetime_work, 0);
998 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
999 	}
1000 	return 0;
1001 }
1002 
1003 /*
1004  *	Determine a default network mask, based on the IP address.
1005  */
1006 
1007 static int inet_abc_len(__be32 addr)
1008 {
1009 	int rc = -1;	/* Something else, probably a multicast. */
1010 
1011 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1012 		rc = 0;
1013 	else {
1014 		__u32 haddr = ntohl(addr);
1015 		if (IN_CLASSA(haddr))
1016 			rc = 8;
1017 		else if (IN_CLASSB(haddr))
1018 			rc = 16;
1019 		else if (IN_CLASSC(haddr))
1020 			rc = 24;
1021 		else if (IN_CLASSE(haddr))
1022 			rc = 32;
1023 	}
1024 
1025 	return rc;
1026 }
1027 
1028 
1029 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1030 {
1031 	struct sockaddr_in sin_orig;
1032 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1033 	struct in_ifaddr __rcu **ifap = NULL;
1034 	struct in_device *in_dev;
1035 	struct in_ifaddr *ifa = NULL;
1036 	struct net_device *dev;
1037 	char *colon;
1038 	int ret = -EFAULT;
1039 	int tryaddrmatch = 0;
1040 
1041 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1042 
1043 	/* save original address for comparison */
1044 	memcpy(&sin_orig, sin, sizeof(*sin));
1045 
1046 	colon = strchr(ifr->ifr_name, ':');
1047 	if (colon)
1048 		*colon = 0;
1049 
1050 	dev_load(net, ifr->ifr_name);
1051 
1052 	switch (cmd) {
1053 	case SIOCGIFADDR:	/* Get interface address */
1054 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1055 	case SIOCGIFDSTADDR:	/* Get the destination address */
1056 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1057 		/* Note that these ioctls will not sleep,
1058 		   so that we do not impose a lock.
1059 		   One day we will be forced to put shlock here (I mean SMP)
1060 		 */
1061 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1062 		memset(sin, 0, sizeof(*sin));
1063 		sin->sin_family = AF_INET;
1064 		break;
1065 
1066 	case SIOCSIFFLAGS:
1067 		ret = -EPERM;
1068 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1069 			goto out;
1070 		break;
1071 	case SIOCSIFADDR:	/* Set interface address (and family) */
1072 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1073 	case SIOCSIFDSTADDR:	/* Set the destination address */
1074 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1075 		ret = -EPERM;
1076 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1077 			goto out;
1078 		ret = -EINVAL;
1079 		if (sin->sin_family != AF_INET)
1080 			goto out;
1081 		break;
1082 	default:
1083 		ret = -EINVAL;
1084 		goto out;
1085 	}
1086 
1087 	rtnl_lock();
1088 
1089 	ret = -ENODEV;
1090 	dev = __dev_get_by_name(net, ifr->ifr_name);
1091 	if (!dev)
1092 		goto done;
1093 
1094 	if (colon)
1095 		*colon = ':';
1096 
1097 	in_dev = __in_dev_get_rtnl(dev);
1098 	if (in_dev) {
1099 		if (tryaddrmatch) {
1100 			/* Matthias Andree */
1101 			/* compare label and address (4.4BSD style) */
1102 			/* note: we only do this for a limited set of ioctls
1103 			   and only if the original address family was AF_INET.
1104 			   This is checked above. */
1105 
1106 			for (ifap = &in_dev->ifa_list;
1107 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1108 			     ifap = &ifa->ifa_next) {
1109 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1110 				    sin_orig.sin_addr.s_addr ==
1111 							ifa->ifa_local) {
1112 					break; /* found */
1113 				}
1114 			}
1115 		}
1116 		/* we didn't get a match, maybe the application is
1117 		   4.3BSD-style and passed in junk so we fall back to
1118 		   comparing just the label */
1119 		if (!ifa) {
1120 			for (ifap = &in_dev->ifa_list;
1121 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1122 			     ifap = &ifa->ifa_next)
1123 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1124 					break;
1125 		}
1126 	}
1127 
1128 	ret = -EADDRNOTAVAIL;
1129 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1130 		goto done;
1131 
1132 	switch (cmd) {
1133 	case SIOCGIFADDR:	/* Get interface address */
1134 		ret = 0;
1135 		sin->sin_addr.s_addr = ifa->ifa_local;
1136 		break;
1137 
1138 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1139 		ret = 0;
1140 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1141 		break;
1142 
1143 	case SIOCGIFDSTADDR:	/* Get the destination address */
1144 		ret = 0;
1145 		sin->sin_addr.s_addr = ifa->ifa_address;
1146 		break;
1147 
1148 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1149 		ret = 0;
1150 		sin->sin_addr.s_addr = ifa->ifa_mask;
1151 		break;
1152 
1153 	case SIOCSIFFLAGS:
1154 		if (colon) {
1155 			ret = -EADDRNOTAVAIL;
1156 			if (!ifa)
1157 				break;
1158 			ret = 0;
1159 			if (!(ifr->ifr_flags & IFF_UP))
1160 				inet_del_ifa(in_dev, ifap, 1);
1161 			break;
1162 		}
1163 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1164 		break;
1165 
1166 	case SIOCSIFADDR:	/* Set interface address (and family) */
1167 		ret = -EINVAL;
1168 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1169 			break;
1170 
1171 		if (!ifa) {
1172 			ret = -ENOBUFS;
1173 			if (!in_dev)
1174 				break;
1175 			ifa = inet_alloc_ifa();
1176 			if (!ifa)
1177 				break;
1178 			INIT_HLIST_NODE(&ifa->hash);
1179 			if (colon)
1180 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1181 			else
1182 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1183 		} else {
1184 			ret = 0;
1185 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1186 				break;
1187 			inet_del_ifa(in_dev, ifap, 0);
1188 			ifa->ifa_broadcast = 0;
1189 			ifa->ifa_scope = 0;
1190 		}
1191 
1192 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1193 
1194 		if (!(dev->flags & IFF_POINTOPOINT)) {
1195 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1196 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1197 			if ((dev->flags & IFF_BROADCAST) &&
1198 			    ifa->ifa_prefixlen < 31)
1199 				ifa->ifa_broadcast = ifa->ifa_address |
1200 						     ~ifa->ifa_mask;
1201 		} else {
1202 			ifa->ifa_prefixlen = 32;
1203 			ifa->ifa_mask = inet_make_mask(32);
1204 		}
1205 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1206 		ret = inet_set_ifa(dev, ifa);
1207 		break;
1208 
1209 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1210 		ret = 0;
1211 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1212 			inet_del_ifa(in_dev, ifap, 0);
1213 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1214 			inet_insert_ifa(ifa);
1215 		}
1216 		break;
1217 
1218 	case SIOCSIFDSTADDR:	/* Set the destination address */
1219 		ret = 0;
1220 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1221 			break;
1222 		ret = -EINVAL;
1223 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1224 			break;
1225 		ret = 0;
1226 		inet_del_ifa(in_dev, ifap, 0);
1227 		ifa->ifa_address = sin->sin_addr.s_addr;
1228 		inet_insert_ifa(ifa);
1229 		break;
1230 
1231 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1232 
1233 		/*
1234 		 *	The mask we set must be legal.
1235 		 */
1236 		ret = -EINVAL;
1237 		if (bad_mask(sin->sin_addr.s_addr, 0))
1238 			break;
1239 		ret = 0;
1240 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1241 			__be32 old_mask = ifa->ifa_mask;
1242 			inet_del_ifa(in_dev, ifap, 0);
1243 			ifa->ifa_mask = sin->sin_addr.s_addr;
1244 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1245 
1246 			/* See if current broadcast address matches
1247 			 * with current netmask, then recalculate
1248 			 * the broadcast address. Otherwise it's a
1249 			 * funny address, so don't touch it since
1250 			 * the user seems to know what (s)he's doing...
1251 			 */
1252 			if ((dev->flags & IFF_BROADCAST) &&
1253 			    (ifa->ifa_prefixlen < 31) &&
1254 			    (ifa->ifa_broadcast ==
1255 			     (ifa->ifa_local|~old_mask))) {
1256 				ifa->ifa_broadcast = (ifa->ifa_local |
1257 						      ~sin->sin_addr.s_addr);
1258 			}
1259 			inet_insert_ifa(ifa);
1260 		}
1261 		break;
1262 	}
1263 done:
1264 	rtnl_unlock();
1265 out:
1266 	return ret;
1267 }
1268 
1269 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1270 {
1271 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1272 	const struct in_ifaddr *ifa;
1273 	struct ifreq ifr;
1274 	int done = 0;
1275 
1276 	if (WARN_ON(size > sizeof(struct ifreq)))
1277 		goto out;
1278 
1279 	if (!in_dev)
1280 		goto out;
1281 
1282 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1283 		if (!buf) {
1284 			done += size;
1285 			continue;
1286 		}
1287 		if (len < size)
1288 			break;
1289 		memset(&ifr, 0, sizeof(struct ifreq));
1290 		strcpy(ifr.ifr_name, ifa->ifa_label);
1291 
1292 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1293 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1294 								ifa->ifa_local;
1295 
1296 		if (copy_to_user(buf + done, &ifr, size)) {
1297 			done = -EFAULT;
1298 			break;
1299 		}
1300 		len  -= size;
1301 		done += size;
1302 	}
1303 out:
1304 	return done;
1305 }
1306 
1307 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1308 				 int scope)
1309 {
1310 	const struct in_ifaddr *ifa;
1311 
1312 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1313 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1314 			continue;
1315 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1316 		    ifa->ifa_scope <= scope)
1317 			return ifa->ifa_local;
1318 	}
1319 
1320 	return 0;
1321 }
1322 
1323 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1324 {
1325 	const struct in_ifaddr *ifa;
1326 	__be32 addr = 0;
1327 	unsigned char localnet_scope = RT_SCOPE_HOST;
1328 	struct in_device *in_dev;
1329 	struct net *net = dev_net(dev);
1330 	int master_idx;
1331 
1332 	rcu_read_lock();
1333 	in_dev = __in_dev_get_rcu(dev);
1334 	if (!in_dev)
1335 		goto no_in_dev;
1336 
1337 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1338 		localnet_scope = RT_SCOPE_LINK;
1339 
1340 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1341 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1342 			continue;
1343 		if (min(ifa->ifa_scope, localnet_scope) > scope)
1344 			continue;
1345 		if (!dst || inet_ifa_match(dst, ifa)) {
1346 			addr = ifa->ifa_local;
1347 			break;
1348 		}
1349 		if (!addr)
1350 			addr = ifa->ifa_local;
1351 	}
1352 
1353 	if (addr)
1354 		goto out_unlock;
1355 no_in_dev:
1356 	master_idx = l3mdev_master_ifindex_rcu(dev);
1357 
1358 	/* For VRFs, the VRF device takes the place of the loopback device,
1359 	 * with addresses on it being preferred.  Note in such cases the
1360 	 * loopback device will be among the devices that fail the master_idx
1361 	 * equality check in the loop below.
1362 	 */
1363 	if (master_idx &&
1364 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1365 	    (in_dev = __in_dev_get_rcu(dev))) {
1366 		addr = in_dev_select_addr(in_dev, scope);
1367 		if (addr)
1368 			goto out_unlock;
1369 	}
1370 
1371 	/* Not loopback addresses on loopback should be preferred
1372 	   in this case. It is important that lo is the first interface
1373 	   in dev_base list.
1374 	 */
1375 	for_each_netdev_rcu(net, dev) {
1376 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1377 			continue;
1378 
1379 		in_dev = __in_dev_get_rcu(dev);
1380 		if (!in_dev)
1381 			continue;
1382 
1383 		addr = in_dev_select_addr(in_dev, scope);
1384 		if (addr)
1385 			goto out_unlock;
1386 	}
1387 out_unlock:
1388 	rcu_read_unlock();
1389 	return addr;
1390 }
1391 EXPORT_SYMBOL(inet_select_addr);
1392 
1393 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1394 			      __be32 local, int scope)
1395 {
1396 	unsigned char localnet_scope = RT_SCOPE_HOST;
1397 	const struct in_ifaddr *ifa;
1398 	__be32 addr = 0;
1399 	int same = 0;
1400 
1401 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1402 		localnet_scope = RT_SCOPE_LINK;
1403 
1404 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1405 		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1406 
1407 		if (!addr &&
1408 		    (local == ifa->ifa_local || !local) &&
1409 		    min_scope <= scope) {
1410 			addr = ifa->ifa_local;
1411 			if (same)
1412 				break;
1413 		}
1414 		if (!same) {
1415 			same = (!local || inet_ifa_match(local, ifa)) &&
1416 				(!dst || inet_ifa_match(dst, ifa));
1417 			if (same && addr) {
1418 				if (local || !dst)
1419 					break;
1420 				/* Is the selected addr into dst subnet? */
1421 				if (inet_ifa_match(addr, ifa))
1422 					break;
1423 				/* No, then can we use new local src? */
1424 				if (min_scope <= scope) {
1425 					addr = ifa->ifa_local;
1426 					break;
1427 				}
1428 				/* search for large dst subnet for addr */
1429 				same = 0;
1430 			}
1431 		}
1432 	}
1433 
1434 	return same ? addr : 0;
1435 }
1436 
1437 /*
1438  * Confirm that local IP address exists using wildcards:
1439  * - net: netns to check, cannot be NULL
1440  * - in_dev: only on this interface, NULL=any interface
1441  * - dst: only in the same subnet as dst, 0=any dst
1442  * - local: address, 0=autoselect the local address
1443  * - scope: maximum allowed scope value for the local address
1444  */
1445 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1446 			 __be32 dst, __be32 local, int scope)
1447 {
1448 	__be32 addr = 0;
1449 	struct net_device *dev;
1450 
1451 	if (in_dev)
1452 		return confirm_addr_indev(in_dev, dst, local, scope);
1453 
1454 	rcu_read_lock();
1455 	for_each_netdev_rcu(net, dev) {
1456 		in_dev = __in_dev_get_rcu(dev);
1457 		if (in_dev) {
1458 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1459 			if (addr)
1460 				break;
1461 		}
1462 	}
1463 	rcu_read_unlock();
1464 
1465 	return addr;
1466 }
1467 EXPORT_SYMBOL(inet_confirm_addr);
1468 
1469 /*
1470  *	Device notifier
1471  */
1472 
1473 int register_inetaddr_notifier(struct notifier_block *nb)
1474 {
1475 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1476 }
1477 EXPORT_SYMBOL(register_inetaddr_notifier);
1478 
1479 int unregister_inetaddr_notifier(struct notifier_block *nb)
1480 {
1481 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1482 }
1483 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1484 
1485 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1486 {
1487 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1488 }
1489 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1490 
1491 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1492 {
1493 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1494 	    nb);
1495 }
1496 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1497 
1498 /* Rename ifa_labels for a device name change. Make some effort to preserve
1499  * existing alias numbering and to create unique labels if possible.
1500 */
1501 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1502 {
1503 	struct in_ifaddr *ifa;
1504 	int named = 0;
1505 
1506 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1507 		char old[IFNAMSIZ], *dot;
1508 
1509 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1510 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1511 		if (named++ == 0)
1512 			goto skip;
1513 		dot = strchr(old, ':');
1514 		if (!dot) {
1515 			sprintf(old, ":%d", named);
1516 			dot = old;
1517 		}
1518 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1519 			strcat(ifa->ifa_label, dot);
1520 		else
1521 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1522 skip:
1523 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1524 	}
1525 }
1526 
1527 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1528 					struct in_device *in_dev)
1529 
1530 {
1531 	const struct in_ifaddr *ifa;
1532 
1533 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1534 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1535 			 ifa->ifa_local, dev,
1536 			 ifa->ifa_local, NULL,
1537 			 dev->dev_addr, NULL);
1538 	}
1539 }
1540 
1541 /* Called only under RTNL semaphore */
1542 
1543 static int inetdev_event(struct notifier_block *this, unsigned long event,
1544 			 void *ptr)
1545 {
1546 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1547 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1548 
1549 	ASSERT_RTNL();
1550 
1551 	if (!in_dev) {
1552 		if (event == NETDEV_REGISTER) {
1553 			in_dev = inetdev_init(dev);
1554 			if (IS_ERR(in_dev))
1555 				return notifier_from_errno(PTR_ERR(in_dev));
1556 			if (dev->flags & IFF_LOOPBACK) {
1557 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1558 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1559 			}
1560 		} else if (event == NETDEV_CHANGEMTU) {
1561 			/* Re-enabling IP */
1562 			if (inetdev_valid_mtu(dev->mtu))
1563 				in_dev = inetdev_init(dev);
1564 		}
1565 		goto out;
1566 	}
1567 
1568 	switch (event) {
1569 	case NETDEV_REGISTER:
1570 		pr_debug("%s: bug\n", __func__);
1571 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1572 		break;
1573 	case NETDEV_UP:
1574 		if (!inetdev_valid_mtu(dev->mtu))
1575 			break;
1576 		if (dev->flags & IFF_LOOPBACK) {
1577 			struct in_ifaddr *ifa = inet_alloc_ifa();
1578 
1579 			if (ifa) {
1580 				INIT_HLIST_NODE(&ifa->hash);
1581 				ifa->ifa_local =
1582 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1583 				ifa->ifa_prefixlen = 8;
1584 				ifa->ifa_mask = inet_make_mask(8);
1585 				in_dev_hold(in_dev);
1586 				ifa->ifa_dev = in_dev;
1587 				ifa->ifa_scope = RT_SCOPE_HOST;
1588 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1589 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1590 						 INFINITY_LIFE_TIME);
1591 				ipv4_devconf_setall(in_dev);
1592 				neigh_parms_data_state_setall(in_dev->arp_parms);
1593 				inet_insert_ifa(ifa);
1594 			}
1595 		}
1596 		ip_mc_up(in_dev);
1597 		fallthrough;
1598 	case NETDEV_CHANGEADDR:
1599 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1600 			break;
1601 		fallthrough;
1602 	case NETDEV_NOTIFY_PEERS:
1603 		/* Send gratuitous ARP to notify of link change */
1604 		inetdev_send_gratuitous_arp(dev, in_dev);
1605 		break;
1606 	case NETDEV_DOWN:
1607 		ip_mc_down(in_dev);
1608 		break;
1609 	case NETDEV_PRE_TYPE_CHANGE:
1610 		ip_mc_unmap(in_dev);
1611 		break;
1612 	case NETDEV_POST_TYPE_CHANGE:
1613 		ip_mc_remap(in_dev);
1614 		break;
1615 	case NETDEV_CHANGEMTU:
1616 		if (inetdev_valid_mtu(dev->mtu))
1617 			break;
1618 		/* disable IP when MTU is not enough */
1619 		fallthrough;
1620 	case NETDEV_UNREGISTER:
1621 		inetdev_destroy(in_dev);
1622 		break;
1623 	case NETDEV_CHANGENAME:
1624 		/* Do not notify about label change, this event is
1625 		 * not interesting to applications using netlink.
1626 		 */
1627 		inetdev_changename(dev, in_dev);
1628 
1629 		devinet_sysctl_unregister(in_dev);
1630 		devinet_sysctl_register(in_dev);
1631 		break;
1632 	}
1633 out:
1634 	return NOTIFY_DONE;
1635 }
1636 
1637 static struct notifier_block ip_netdev_notifier = {
1638 	.notifier_call = inetdev_event,
1639 };
1640 
1641 static size_t inet_nlmsg_size(void)
1642 {
1643 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1644 	       + nla_total_size(4) /* IFA_ADDRESS */
1645 	       + nla_total_size(4) /* IFA_LOCAL */
1646 	       + nla_total_size(4) /* IFA_BROADCAST */
1647 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1648 	       + nla_total_size(4)  /* IFA_FLAGS */
1649 	       + nla_total_size(1)  /* IFA_PROTO */
1650 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1651 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1652 }
1653 
1654 static inline u32 cstamp_delta(unsigned long cstamp)
1655 {
1656 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1657 }
1658 
1659 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1660 			 unsigned long tstamp, u32 preferred, u32 valid)
1661 {
1662 	struct ifa_cacheinfo ci;
1663 
1664 	ci.cstamp = cstamp_delta(cstamp);
1665 	ci.tstamp = cstamp_delta(tstamp);
1666 	ci.ifa_prefered = preferred;
1667 	ci.ifa_valid = valid;
1668 
1669 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1670 }
1671 
1672 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1673 			    struct inet_fill_args *args)
1674 {
1675 	struct ifaddrmsg *ifm;
1676 	struct nlmsghdr  *nlh;
1677 	u32 preferred, valid;
1678 
1679 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1680 			args->flags);
1681 	if (!nlh)
1682 		return -EMSGSIZE;
1683 
1684 	ifm = nlmsg_data(nlh);
1685 	ifm->ifa_family = AF_INET;
1686 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1687 	ifm->ifa_flags = ifa->ifa_flags;
1688 	ifm->ifa_scope = ifa->ifa_scope;
1689 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1690 
1691 	if (args->netnsid >= 0 &&
1692 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1693 		goto nla_put_failure;
1694 
1695 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1696 		preferred = ifa->ifa_preferred_lft;
1697 		valid = ifa->ifa_valid_lft;
1698 		if (preferred != INFINITY_LIFE_TIME) {
1699 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1700 
1701 			if (preferred > tval)
1702 				preferred -= tval;
1703 			else
1704 				preferred = 0;
1705 			if (valid != INFINITY_LIFE_TIME) {
1706 				if (valid > tval)
1707 					valid -= tval;
1708 				else
1709 					valid = 0;
1710 			}
1711 		}
1712 	} else {
1713 		preferred = INFINITY_LIFE_TIME;
1714 		valid = INFINITY_LIFE_TIME;
1715 	}
1716 	if ((ifa->ifa_address &&
1717 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1718 	    (ifa->ifa_local &&
1719 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1720 	    (ifa->ifa_broadcast &&
1721 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1722 	    (ifa->ifa_label[0] &&
1723 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1724 	    (ifa->ifa_proto &&
1725 	     nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1726 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1727 	    (ifa->ifa_rt_priority &&
1728 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1729 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1730 			  preferred, valid))
1731 		goto nla_put_failure;
1732 
1733 	nlmsg_end(skb, nlh);
1734 	return 0;
1735 
1736 nla_put_failure:
1737 	nlmsg_cancel(skb, nlh);
1738 	return -EMSGSIZE;
1739 }
1740 
1741 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1742 				      struct inet_fill_args *fillargs,
1743 				      struct net **tgt_net, struct sock *sk,
1744 				      struct netlink_callback *cb)
1745 {
1746 	struct netlink_ext_ack *extack = cb->extack;
1747 	struct nlattr *tb[IFA_MAX+1];
1748 	struct ifaddrmsg *ifm;
1749 	int err, i;
1750 
1751 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1752 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1753 		return -EINVAL;
1754 	}
1755 
1756 	ifm = nlmsg_data(nlh);
1757 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1758 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1759 		return -EINVAL;
1760 	}
1761 
1762 	fillargs->ifindex = ifm->ifa_index;
1763 	if (fillargs->ifindex) {
1764 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1765 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1766 	}
1767 
1768 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1769 					    ifa_ipv4_policy, extack);
1770 	if (err < 0)
1771 		return err;
1772 
1773 	for (i = 0; i <= IFA_MAX; ++i) {
1774 		if (!tb[i])
1775 			continue;
1776 
1777 		if (i == IFA_TARGET_NETNSID) {
1778 			struct net *net;
1779 
1780 			fillargs->netnsid = nla_get_s32(tb[i]);
1781 
1782 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1783 			if (IS_ERR(net)) {
1784 				fillargs->netnsid = -1;
1785 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1786 				return PTR_ERR(net);
1787 			}
1788 			*tgt_net = net;
1789 		} else {
1790 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1791 			return -EINVAL;
1792 		}
1793 	}
1794 
1795 	return 0;
1796 }
1797 
1798 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1799 			    struct netlink_callback *cb, int s_ip_idx,
1800 			    struct inet_fill_args *fillargs)
1801 {
1802 	struct in_ifaddr *ifa;
1803 	int ip_idx = 0;
1804 	int err;
1805 
1806 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1807 		if (ip_idx < s_ip_idx) {
1808 			ip_idx++;
1809 			continue;
1810 		}
1811 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1812 		if (err < 0)
1813 			goto done;
1814 
1815 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1816 		ip_idx++;
1817 	}
1818 	err = 0;
1819 
1820 done:
1821 	cb->args[2] = ip_idx;
1822 
1823 	return err;
1824 }
1825 
1826 /* Combine dev_addr_genid and dev_base_seq to detect changes.
1827  */
1828 static u32 inet_base_seq(const struct net *net)
1829 {
1830 	u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1831 		  net->dev_base_seq;
1832 
1833 	/* Must not return 0 (see nl_dump_check_consistent()).
1834 	 * Chose a value far away from 0.
1835 	 */
1836 	if (!res)
1837 		res = 0x80000000;
1838 	return res;
1839 }
1840 
1841 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1842 {
1843 	const struct nlmsghdr *nlh = cb->nlh;
1844 	struct inet_fill_args fillargs = {
1845 		.portid = NETLINK_CB(cb->skb).portid,
1846 		.seq = nlh->nlmsg_seq,
1847 		.event = RTM_NEWADDR,
1848 		.flags = NLM_F_MULTI,
1849 		.netnsid = -1,
1850 	};
1851 	struct net *net = sock_net(skb->sk);
1852 	struct net *tgt_net = net;
1853 	int h, s_h;
1854 	int idx, s_idx;
1855 	int s_ip_idx;
1856 	struct net_device *dev;
1857 	struct in_device *in_dev;
1858 	struct hlist_head *head;
1859 	int err = 0;
1860 
1861 	s_h = cb->args[0];
1862 	s_idx = idx = cb->args[1];
1863 	s_ip_idx = cb->args[2];
1864 
1865 	if (cb->strict_check) {
1866 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1867 						 skb->sk, cb);
1868 		if (err < 0)
1869 			goto put_tgt_net;
1870 
1871 		err = 0;
1872 		if (fillargs.ifindex) {
1873 			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1874 			if (!dev) {
1875 				err = -ENODEV;
1876 				goto put_tgt_net;
1877 			}
1878 
1879 			in_dev = __in_dev_get_rtnl(dev);
1880 			if (in_dev) {
1881 				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1882 						       &fillargs);
1883 			}
1884 			goto put_tgt_net;
1885 		}
1886 	}
1887 
1888 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1889 		idx = 0;
1890 		head = &tgt_net->dev_index_head[h];
1891 		rcu_read_lock();
1892 		cb->seq = inet_base_seq(tgt_net);
1893 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1894 			if (idx < s_idx)
1895 				goto cont;
1896 			if (h > s_h || idx > s_idx)
1897 				s_ip_idx = 0;
1898 			in_dev = __in_dev_get_rcu(dev);
1899 			if (!in_dev)
1900 				goto cont;
1901 
1902 			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1903 					       &fillargs);
1904 			if (err < 0) {
1905 				rcu_read_unlock();
1906 				goto done;
1907 			}
1908 cont:
1909 			idx++;
1910 		}
1911 		rcu_read_unlock();
1912 	}
1913 
1914 done:
1915 	cb->args[0] = h;
1916 	cb->args[1] = idx;
1917 put_tgt_net:
1918 	if (fillargs.netnsid >= 0)
1919 		put_net(tgt_net);
1920 
1921 	return skb->len ? : err;
1922 }
1923 
1924 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1925 		      u32 portid)
1926 {
1927 	struct inet_fill_args fillargs = {
1928 		.portid = portid,
1929 		.seq = nlh ? nlh->nlmsg_seq : 0,
1930 		.event = event,
1931 		.flags = 0,
1932 		.netnsid = -1,
1933 	};
1934 	struct sk_buff *skb;
1935 	int err = -ENOBUFS;
1936 	struct net *net;
1937 
1938 	net = dev_net(ifa->ifa_dev->dev);
1939 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1940 	if (!skb)
1941 		goto errout;
1942 
1943 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1944 	if (err < 0) {
1945 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1946 		WARN_ON(err == -EMSGSIZE);
1947 		kfree_skb(skb);
1948 		goto errout;
1949 	}
1950 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1951 	return;
1952 errout:
1953 	if (err < 0)
1954 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1955 }
1956 
1957 static size_t inet_get_link_af_size(const struct net_device *dev,
1958 				    u32 ext_filter_mask)
1959 {
1960 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1961 
1962 	if (!in_dev)
1963 		return 0;
1964 
1965 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1966 }
1967 
1968 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1969 			     u32 ext_filter_mask)
1970 {
1971 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1972 	struct nlattr *nla;
1973 	int i;
1974 
1975 	if (!in_dev)
1976 		return -ENODATA;
1977 
1978 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1979 	if (!nla)
1980 		return -EMSGSIZE;
1981 
1982 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1983 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1984 
1985 	return 0;
1986 }
1987 
1988 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1989 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1990 };
1991 
1992 static int inet_validate_link_af(const struct net_device *dev,
1993 				 const struct nlattr *nla,
1994 				 struct netlink_ext_ack *extack)
1995 {
1996 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1997 	int err, rem;
1998 
1999 	if (dev && !__in_dev_get_rtnl(dev))
2000 		return -EAFNOSUPPORT;
2001 
2002 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
2003 					  inet_af_policy, extack);
2004 	if (err < 0)
2005 		return err;
2006 
2007 	if (tb[IFLA_INET_CONF]) {
2008 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
2009 			int cfgid = nla_type(a);
2010 
2011 			if (nla_len(a) < 4)
2012 				return -EINVAL;
2013 
2014 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2015 				return -EINVAL;
2016 		}
2017 	}
2018 
2019 	return 0;
2020 }
2021 
2022 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2023 			    struct netlink_ext_ack *extack)
2024 {
2025 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
2026 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
2027 	int rem;
2028 
2029 	if (!in_dev)
2030 		return -EAFNOSUPPORT;
2031 
2032 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2033 		return -EINVAL;
2034 
2035 	if (tb[IFLA_INET_CONF]) {
2036 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2037 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2038 	}
2039 
2040 	return 0;
2041 }
2042 
2043 static int inet_netconf_msgsize_devconf(int type)
2044 {
2045 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2046 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2047 	bool all = false;
2048 
2049 	if (type == NETCONFA_ALL)
2050 		all = true;
2051 
2052 	if (all || type == NETCONFA_FORWARDING)
2053 		size += nla_total_size(4);
2054 	if (all || type == NETCONFA_RP_FILTER)
2055 		size += nla_total_size(4);
2056 	if (all || type == NETCONFA_MC_FORWARDING)
2057 		size += nla_total_size(4);
2058 	if (all || type == NETCONFA_BC_FORWARDING)
2059 		size += nla_total_size(4);
2060 	if (all || type == NETCONFA_PROXY_NEIGH)
2061 		size += nla_total_size(4);
2062 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2063 		size += nla_total_size(4);
2064 
2065 	return size;
2066 }
2067 
2068 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2069 				     struct ipv4_devconf *devconf, u32 portid,
2070 				     u32 seq, int event, unsigned int flags,
2071 				     int type)
2072 {
2073 	struct nlmsghdr  *nlh;
2074 	struct netconfmsg *ncm;
2075 	bool all = false;
2076 
2077 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2078 			flags);
2079 	if (!nlh)
2080 		return -EMSGSIZE;
2081 
2082 	if (type == NETCONFA_ALL)
2083 		all = true;
2084 
2085 	ncm = nlmsg_data(nlh);
2086 	ncm->ncm_family = AF_INET;
2087 
2088 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2089 		goto nla_put_failure;
2090 
2091 	if (!devconf)
2092 		goto out;
2093 
2094 	if ((all || type == NETCONFA_FORWARDING) &&
2095 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2096 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2097 		goto nla_put_failure;
2098 	if ((all || type == NETCONFA_RP_FILTER) &&
2099 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2100 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2101 		goto nla_put_failure;
2102 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2103 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2104 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2105 		goto nla_put_failure;
2106 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2107 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2108 			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2109 		goto nla_put_failure;
2110 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2111 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2112 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2113 		goto nla_put_failure;
2114 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2115 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2116 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2117 		goto nla_put_failure;
2118 
2119 out:
2120 	nlmsg_end(skb, nlh);
2121 	return 0;
2122 
2123 nla_put_failure:
2124 	nlmsg_cancel(skb, nlh);
2125 	return -EMSGSIZE;
2126 }
2127 
2128 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2129 				 int ifindex, struct ipv4_devconf *devconf)
2130 {
2131 	struct sk_buff *skb;
2132 	int err = -ENOBUFS;
2133 
2134 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2135 	if (!skb)
2136 		goto errout;
2137 
2138 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2139 					event, 0, type);
2140 	if (err < 0) {
2141 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2142 		WARN_ON(err == -EMSGSIZE);
2143 		kfree_skb(skb);
2144 		goto errout;
2145 	}
2146 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2147 	return;
2148 errout:
2149 	if (err < 0)
2150 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2151 }
2152 
2153 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2154 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2155 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2156 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2157 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2158 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2159 };
2160 
2161 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2162 				      const struct nlmsghdr *nlh,
2163 				      struct nlattr **tb,
2164 				      struct netlink_ext_ack *extack)
2165 {
2166 	int i, err;
2167 
2168 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2169 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2170 		return -EINVAL;
2171 	}
2172 
2173 	if (!netlink_strict_get_check(skb))
2174 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2175 					      tb, NETCONFA_MAX,
2176 					      devconf_ipv4_policy, extack);
2177 
2178 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2179 					    tb, NETCONFA_MAX,
2180 					    devconf_ipv4_policy, extack);
2181 	if (err)
2182 		return err;
2183 
2184 	for (i = 0; i <= NETCONFA_MAX; i++) {
2185 		if (!tb[i])
2186 			continue;
2187 
2188 		switch (i) {
2189 		case NETCONFA_IFINDEX:
2190 			break;
2191 		default:
2192 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2193 			return -EINVAL;
2194 		}
2195 	}
2196 
2197 	return 0;
2198 }
2199 
2200 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2201 				    struct nlmsghdr *nlh,
2202 				    struct netlink_ext_ack *extack)
2203 {
2204 	struct net *net = sock_net(in_skb->sk);
2205 	struct nlattr *tb[NETCONFA_MAX+1];
2206 	struct sk_buff *skb;
2207 	struct ipv4_devconf *devconf;
2208 	struct in_device *in_dev;
2209 	struct net_device *dev;
2210 	int ifindex;
2211 	int err;
2212 
2213 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2214 	if (err)
2215 		goto errout;
2216 
2217 	err = -EINVAL;
2218 	if (!tb[NETCONFA_IFINDEX])
2219 		goto errout;
2220 
2221 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2222 	switch (ifindex) {
2223 	case NETCONFA_IFINDEX_ALL:
2224 		devconf = net->ipv4.devconf_all;
2225 		break;
2226 	case NETCONFA_IFINDEX_DEFAULT:
2227 		devconf = net->ipv4.devconf_dflt;
2228 		break;
2229 	default:
2230 		dev = __dev_get_by_index(net, ifindex);
2231 		if (!dev)
2232 			goto errout;
2233 		in_dev = __in_dev_get_rtnl(dev);
2234 		if (!in_dev)
2235 			goto errout;
2236 		devconf = &in_dev->cnf;
2237 		break;
2238 	}
2239 
2240 	err = -ENOBUFS;
2241 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2242 	if (!skb)
2243 		goto errout;
2244 
2245 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2246 					NETLINK_CB(in_skb).portid,
2247 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2248 					NETCONFA_ALL);
2249 	if (err < 0) {
2250 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2251 		WARN_ON(err == -EMSGSIZE);
2252 		kfree_skb(skb);
2253 		goto errout;
2254 	}
2255 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2256 errout:
2257 	return err;
2258 }
2259 
2260 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2261 				     struct netlink_callback *cb)
2262 {
2263 	const struct nlmsghdr *nlh = cb->nlh;
2264 	struct net *net = sock_net(skb->sk);
2265 	int h, s_h;
2266 	int idx, s_idx;
2267 	struct net_device *dev;
2268 	struct in_device *in_dev;
2269 	struct hlist_head *head;
2270 
2271 	if (cb->strict_check) {
2272 		struct netlink_ext_ack *extack = cb->extack;
2273 		struct netconfmsg *ncm;
2274 
2275 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2276 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2277 			return -EINVAL;
2278 		}
2279 
2280 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2281 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2282 			return -EINVAL;
2283 		}
2284 	}
2285 
2286 	s_h = cb->args[0];
2287 	s_idx = idx = cb->args[1];
2288 
2289 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2290 		idx = 0;
2291 		head = &net->dev_index_head[h];
2292 		rcu_read_lock();
2293 		cb->seq = inet_base_seq(net);
2294 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2295 			if (idx < s_idx)
2296 				goto cont;
2297 			in_dev = __in_dev_get_rcu(dev);
2298 			if (!in_dev)
2299 				goto cont;
2300 
2301 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2302 						      &in_dev->cnf,
2303 						      NETLINK_CB(cb->skb).portid,
2304 						      nlh->nlmsg_seq,
2305 						      RTM_NEWNETCONF,
2306 						      NLM_F_MULTI,
2307 						      NETCONFA_ALL) < 0) {
2308 				rcu_read_unlock();
2309 				goto done;
2310 			}
2311 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2312 cont:
2313 			idx++;
2314 		}
2315 		rcu_read_unlock();
2316 	}
2317 	if (h == NETDEV_HASHENTRIES) {
2318 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2319 					      net->ipv4.devconf_all,
2320 					      NETLINK_CB(cb->skb).portid,
2321 					      nlh->nlmsg_seq,
2322 					      RTM_NEWNETCONF, NLM_F_MULTI,
2323 					      NETCONFA_ALL) < 0)
2324 			goto done;
2325 		else
2326 			h++;
2327 	}
2328 	if (h == NETDEV_HASHENTRIES + 1) {
2329 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2330 					      net->ipv4.devconf_dflt,
2331 					      NETLINK_CB(cb->skb).portid,
2332 					      nlh->nlmsg_seq,
2333 					      RTM_NEWNETCONF, NLM_F_MULTI,
2334 					      NETCONFA_ALL) < 0)
2335 			goto done;
2336 		else
2337 			h++;
2338 	}
2339 done:
2340 	cb->args[0] = h;
2341 	cb->args[1] = idx;
2342 
2343 	return skb->len;
2344 }
2345 
2346 #ifdef CONFIG_SYSCTL
2347 
2348 static void devinet_copy_dflt_conf(struct net *net, int i)
2349 {
2350 	struct net_device *dev;
2351 
2352 	rcu_read_lock();
2353 	for_each_netdev_rcu(net, dev) {
2354 		struct in_device *in_dev;
2355 
2356 		in_dev = __in_dev_get_rcu(dev);
2357 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2358 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2359 	}
2360 	rcu_read_unlock();
2361 }
2362 
2363 /* called with RTNL locked */
2364 static void inet_forward_change(struct net *net)
2365 {
2366 	struct net_device *dev;
2367 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2368 
2369 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2370 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2371 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2372 				    NETCONFA_FORWARDING,
2373 				    NETCONFA_IFINDEX_ALL,
2374 				    net->ipv4.devconf_all);
2375 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2376 				    NETCONFA_FORWARDING,
2377 				    NETCONFA_IFINDEX_DEFAULT,
2378 				    net->ipv4.devconf_dflt);
2379 
2380 	for_each_netdev(net, dev) {
2381 		struct in_device *in_dev;
2382 
2383 		if (on)
2384 			dev_disable_lro(dev);
2385 
2386 		in_dev = __in_dev_get_rtnl(dev);
2387 		if (in_dev) {
2388 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2389 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2390 						    NETCONFA_FORWARDING,
2391 						    dev->ifindex, &in_dev->cnf);
2392 		}
2393 	}
2394 }
2395 
2396 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2397 {
2398 	if (cnf == net->ipv4.devconf_dflt)
2399 		return NETCONFA_IFINDEX_DEFAULT;
2400 	else if (cnf == net->ipv4.devconf_all)
2401 		return NETCONFA_IFINDEX_ALL;
2402 	else {
2403 		struct in_device *idev
2404 			= container_of(cnf, struct in_device, cnf);
2405 		return idev->dev->ifindex;
2406 	}
2407 }
2408 
2409 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2410 			     void *buffer, size_t *lenp, loff_t *ppos)
2411 {
2412 	int old_value = *(int *)ctl->data;
2413 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2414 	int new_value = *(int *)ctl->data;
2415 
2416 	if (write) {
2417 		struct ipv4_devconf *cnf = ctl->extra1;
2418 		struct net *net = ctl->extra2;
2419 		int i = (int *)ctl->data - cnf->data;
2420 		int ifindex;
2421 
2422 		set_bit(i, cnf->state);
2423 
2424 		if (cnf == net->ipv4.devconf_dflt)
2425 			devinet_copy_dflt_conf(net, i);
2426 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2427 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2428 			if ((new_value == 0) && (old_value != 0))
2429 				rt_cache_flush(net);
2430 
2431 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2432 		    new_value != old_value)
2433 			rt_cache_flush(net);
2434 
2435 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2436 		    new_value != old_value) {
2437 			ifindex = devinet_conf_ifindex(net, cnf);
2438 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2439 						    NETCONFA_RP_FILTER,
2440 						    ifindex, cnf);
2441 		}
2442 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2443 		    new_value != old_value) {
2444 			ifindex = devinet_conf_ifindex(net, cnf);
2445 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2446 						    NETCONFA_PROXY_NEIGH,
2447 						    ifindex, cnf);
2448 		}
2449 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2450 		    new_value != old_value) {
2451 			ifindex = devinet_conf_ifindex(net, cnf);
2452 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2453 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2454 						    ifindex, cnf);
2455 		}
2456 	}
2457 
2458 	return ret;
2459 }
2460 
2461 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2462 				  void *buffer, size_t *lenp, loff_t *ppos)
2463 {
2464 	int *valp = ctl->data;
2465 	int val = *valp;
2466 	loff_t pos = *ppos;
2467 	struct net *net = ctl->extra2;
2468 	int ret;
2469 
2470 	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2471 		return -EPERM;
2472 
2473 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2474 
2475 	if (write && *valp != val) {
2476 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2477 			if (!rtnl_trylock()) {
2478 				/* Restore the original values before restarting */
2479 				*valp = val;
2480 				*ppos = pos;
2481 				return restart_syscall();
2482 			}
2483 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2484 				inet_forward_change(net);
2485 			} else {
2486 				struct ipv4_devconf *cnf = ctl->extra1;
2487 				struct in_device *idev =
2488 					container_of(cnf, struct in_device, cnf);
2489 				if (*valp)
2490 					dev_disable_lro(idev->dev);
2491 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2492 							    NETCONFA_FORWARDING,
2493 							    idev->dev->ifindex,
2494 							    cnf);
2495 			}
2496 			rtnl_unlock();
2497 			rt_cache_flush(net);
2498 		} else
2499 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2500 						    NETCONFA_FORWARDING,
2501 						    NETCONFA_IFINDEX_DEFAULT,
2502 						    net->ipv4.devconf_dflt);
2503 	}
2504 
2505 	return ret;
2506 }
2507 
2508 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2509 				void *buffer, size_t *lenp, loff_t *ppos)
2510 {
2511 	int *valp = ctl->data;
2512 	int val = *valp;
2513 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2514 	struct net *net = ctl->extra2;
2515 
2516 	if (write && *valp != val)
2517 		rt_cache_flush(net);
2518 
2519 	return ret;
2520 }
2521 
2522 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2523 	{ \
2524 		.procname	= name, \
2525 		.data		= ipv4_devconf.data + \
2526 				  IPV4_DEVCONF_ ## attr - 1, \
2527 		.maxlen		= sizeof(int), \
2528 		.mode		= mval, \
2529 		.proc_handler	= proc, \
2530 		.extra1		= &ipv4_devconf, \
2531 	}
2532 
2533 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2534 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2535 
2536 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2537 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2538 
2539 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2540 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2541 
2542 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2543 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2544 
2545 static struct devinet_sysctl_table {
2546 	struct ctl_table_header *sysctl_header;
2547 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2548 } devinet_sysctl = {
2549 	.devinet_vars = {
2550 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2551 					     devinet_sysctl_forward),
2552 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2553 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2554 
2555 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2556 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2557 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2558 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2559 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2560 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2561 					"accept_source_route"),
2562 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2563 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2564 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2565 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2566 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2567 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2568 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2569 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2570 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2571 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2572 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2573 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2574 		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2575 					"arp_evict_nocarrier"),
2576 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2577 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2578 					"force_igmp_version"),
2579 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2580 					"igmpv2_unsolicited_report_interval"),
2581 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2582 					"igmpv3_unsolicited_report_interval"),
2583 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2584 					"ignore_routes_with_linkdown"),
2585 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2586 					"drop_gratuitous_arp"),
2587 
2588 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2589 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2590 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2591 					      "promote_secondaries"),
2592 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2593 					      "route_localnet"),
2594 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2595 					      "drop_unicast_in_l2_multicast"),
2596 	},
2597 };
2598 
2599 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2600 				     int ifindex, struct ipv4_devconf *p)
2601 {
2602 	int i;
2603 	struct devinet_sysctl_table *t;
2604 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2605 
2606 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2607 	if (!t)
2608 		goto out;
2609 
2610 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2611 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2612 		t->devinet_vars[i].extra1 = p;
2613 		t->devinet_vars[i].extra2 = net;
2614 	}
2615 
2616 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2617 
2618 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2619 	if (!t->sysctl_header)
2620 		goto free;
2621 
2622 	p->sysctl = t;
2623 
2624 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2625 				    ifindex, p);
2626 	return 0;
2627 
2628 free:
2629 	kfree(t);
2630 out:
2631 	return -ENOMEM;
2632 }
2633 
2634 static void __devinet_sysctl_unregister(struct net *net,
2635 					struct ipv4_devconf *cnf, int ifindex)
2636 {
2637 	struct devinet_sysctl_table *t = cnf->sysctl;
2638 
2639 	if (t) {
2640 		cnf->sysctl = NULL;
2641 		unregister_net_sysctl_table(t->sysctl_header);
2642 		kfree(t);
2643 	}
2644 
2645 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2646 }
2647 
2648 static int devinet_sysctl_register(struct in_device *idev)
2649 {
2650 	int err;
2651 
2652 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2653 		return -EINVAL;
2654 
2655 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2656 	if (err)
2657 		return err;
2658 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2659 					idev->dev->ifindex, &idev->cnf);
2660 	if (err)
2661 		neigh_sysctl_unregister(idev->arp_parms);
2662 	return err;
2663 }
2664 
2665 static void devinet_sysctl_unregister(struct in_device *idev)
2666 {
2667 	struct net *net = dev_net(idev->dev);
2668 
2669 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2670 	neigh_sysctl_unregister(idev->arp_parms);
2671 }
2672 
2673 static struct ctl_table ctl_forward_entry[] = {
2674 	{
2675 		.procname	= "ip_forward",
2676 		.data		= &ipv4_devconf.data[
2677 					IPV4_DEVCONF_FORWARDING - 1],
2678 		.maxlen		= sizeof(int),
2679 		.mode		= 0644,
2680 		.proc_handler	= devinet_sysctl_forward,
2681 		.extra1		= &ipv4_devconf,
2682 		.extra2		= &init_net,
2683 	},
2684 	{ },
2685 };
2686 #endif
2687 
2688 static __net_init int devinet_init_net(struct net *net)
2689 {
2690 	int err;
2691 	struct ipv4_devconf *all, *dflt;
2692 #ifdef CONFIG_SYSCTL
2693 	struct ctl_table *tbl;
2694 	struct ctl_table_header *forw_hdr;
2695 #endif
2696 
2697 	err = -ENOMEM;
2698 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2699 	if (!all)
2700 		goto err_alloc_all;
2701 
2702 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2703 	if (!dflt)
2704 		goto err_alloc_dflt;
2705 
2706 #ifdef CONFIG_SYSCTL
2707 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2708 	if (!tbl)
2709 		goto err_alloc_ctl;
2710 
2711 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2712 	tbl[0].extra1 = all;
2713 	tbl[0].extra2 = net;
2714 #endif
2715 
2716 	if (!net_eq(net, &init_net)) {
2717 		switch (net_inherit_devconf()) {
2718 		case 3:
2719 			/* copy from the current netns */
2720 			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2721 			       sizeof(ipv4_devconf));
2722 			memcpy(dflt,
2723 			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2724 			       sizeof(ipv4_devconf_dflt));
2725 			break;
2726 		case 0:
2727 		case 1:
2728 			/* copy from init_net */
2729 			memcpy(all, init_net.ipv4.devconf_all,
2730 			       sizeof(ipv4_devconf));
2731 			memcpy(dflt, init_net.ipv4.devconf_dflt,
2732 			       sizeof(ipv4_devconf_dflt));
2733 			break;
2734 		case 2:
2735 			/* use compiled values */
2736 			break;
2737 		}
2738 	}
2739 
2740 #ifdef CONFIG_SYSCTL
2741 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2742 	if (err < 0)
2743 		goto err_reg_all;
2744 
2745 	err = __devinet_sysctl_register(net, "default",
2746 					NETCONFA_IFINDEX_DEFAULT, dflt);
2747 	if (err < 0)
2748 		goto err_reg_dflt;
2749 
2750 	err = -ENOMEM;
2751 	forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2752 					  ARRAY_SIZE(ctl_forward_entry));
2753 	if (!forw_hdr)
2754 		goto err_reg_ctl;
2755 	net->ipv4.forw_hdr = forw_hdr;
2756 #endif
2757 
2758 	net->ipv4.devconf_all = all;
2759 	net->ipv4.devconf_dflt = dflt;
2760 	return 0;
2761 
2762 #ifdef CONFIG_SYSCTL
2763 err_reg_ctl:
2764 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2765 err_reg_dflt:
2766 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2767 err_reg_all:
2768 	kfree(tbl);
2769 err_alloc_ctl:
2770 #endif
2771 	kfree(dflt);
2772 err_alloc_dflt:
2773 	kfree(all);
2774 err_alloc_all:
2775 	return err;
2776 }
2777 
2778 static __net_exit void devinet_exit_net(struct net *net)
2779 {
2780 #ifdef CONFIG_SYSCTL
2781 	struct ctl_table *tbl;
2782 
2783 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2784 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2785 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2786 				    NETCONFA_IFINDEX_DEFAULT);
2787 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2788 				    NETCONFA_IFINDEX_ALL);
2789 	kfree(tbl);
2790 #endif
2791 	kfree(net->ipv4.devconf_dflt);
2792 	kfree(net->ipv4.devconf_all);
2793 }
2794 
2795 static __net_initdata struct pernet_operations devinet_ops = {
2796 	.init = devinet_init_net,
2797 	.exit = devinet_exit_net,
2798 };
2799 
2800 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2801 	.family		  = AF_INET,
2802 	.fill_link_af	  = inet_fill_link_af,
2803 	.get_link_af_size = inet_get_link_af_size,
2804 	.validate_link_af = inet_validate_link_af,
2805 	.set_link_af	  = inet_set_link_af,
2806 };
2807 
2808 void __init devinet_init(void)
2809 {
2810 	int i;
2811 
2812 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2813 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2814 
2815 	register_pernet_subsys(&devinet_ops);
2816 	register_netdevice_notifier(&ip_netdev_notifier);
2817 
2818 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2819 
2820 	rtnl_af_register(&inet_af_ops);
2821 
2822 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2823 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2824 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2825 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2826 		      inet_netconf_dump_devconf, 0);
2827 }
2828