xref: /openbmc/linux/net/ipv4/devinet.c (revision a1c7c49c2091926962f8c1c866d386febffec5d8)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	NET3	IP device support routines.
4  *
5  *	Derived from the IP parts of dev.c 1.0.19
6  * 		Authors:	Ross Biro
7  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *	Additional Authors:
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *	Changes:
15  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16  *					lists.
17  *		Cyrus Durgin:		updated for kmod
18  *		Matthias Andree:	in devinet_ioctl, compare label and
19  *					address (4.4BSD alias style support),
20  *					fall back to comparing just the label
21  *					if no match found.
22  */
23 
24 
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56 
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64 
65 #define IPV6ONLY_FLAGS	\
66 		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79 	},
80 };
81 
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83 	.data = {
84 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92 	},
93 };
94 
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97 
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 	[IFA_LOCAL]     	= { .type = NLA_U32 },
100 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
101 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104 	[IFA_FLAGS]		= { .type = NLA_U32 },
105 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107 };
108 
109 struct inet_fill_args {
110 	u32 portid;
111 	u32 seq;
112 	int event;
113 	unsigned int flags;
114 	int netnsid;
115 	int ifindex;
116 };
117 
118 #define IN4_ADDR_HSIZE_SHIFT	8
119 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
120 
121 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
122 
123 static u32 inet_addr_hash(const struct net *net, __be32 addr)
124 {
125 	u32 val = (__force u32) addr ^ net_hash_mix(net);
126 
127 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
128 }
129 
130 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
131 {
132 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
133 
134 	ASSERT_RTNL();
135 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
136 }
137 
138 static void inet_hash_remove(struct in_ifaddr *ifa)
139 {
140 	ASSERT_RTNL();
141 	hlist_del_init_rcu(&ifa->hash);
142 }
143 
144 /**
145  * __ip_dev_find - find the first device with a given source address.
146  * @net: the net namespace
147  * @addr: the source address
148  * @devref: if true, take a reference on the found device
149  *
150  * If a caller uses devref=false, it should be protected by RCU, or RTNL
151  */
152 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
153 {
154 	struct net_device *result = NULL;
155 	struct in_ifaddr *ifa;
156 
157 	rcu_read_lock();
158 	ifa = inet_lookup_ifaddr_rcu(net, addr);
159 	if (!ifa) {
160 		struct flowi4 fl4 = { .daddr = addr };
161 		struct fib_result res = { 0 };
162 		struct fib_table *local;
163 
164 		/* Fallback to FIB local table so that communication
165 		 * over loopback subnets work.
166 		 */
167 		local = fib_get_table(net, RT_TABLE_LOCAL);
168 		if (local &&
169 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
170 		    res.type == RTN_LOCAL)
171 			result = FIB_RES_DEV(res);
172 	} else {
173 		result = ifa->ifa_dev->dev;
174 	}
175 	if (result && devref)
176 		dev_hold(result);
177 	rcu_read_unlock();
178 	return result;
179 }
180 EXPORT_SYMBOL(__ip_dev_find);
181 
182 /* called under RCU lock */
183 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
184 {
185 	u32 hash = inet_addr_hash(net, addr);
186 	struct in_ifaddr *ifa;
187 
188 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
189 		if (ifa->ifa_local == addr &&
190 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
191 			return ifa;
192 
193 	return NULL;
194 }
195 
196 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
197 
198 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
200 static void inet_del_ifa(struct in_device *in_dev,
201 			 struct in_ifaddr __rcu **ifap,
202 			 int destroy);
203 #ifdef CONFIG_SYSCTL
204 static int devinet_sysctl_register(struct in_device *idev);
205 static void devinet_sysctl_unregister(struct in_device *idev);
206 #else
207 static int devinet_sysctl_register(struct in_device *idev)
208 {
209 	return 0;
210 }
211 static void devinet_sysctl_unregister(struct in_device *idev)
212 {
213 }
214 #endif
215 
216 /* Locks all the inet devices. */
217 
218 static struct in_ifaddr *inet_alloc_ifa(void)
219 {
220 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
221 }
222 
223 static void inet_rcu_free_ifa(struct rcu_head *head)
224 {
225 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
226 	if (ifa->ifa_dev)
227 		in_dev_put(ifa->ifa_dev);
228 	kfree(ifa);
229 }
230 
231 static void inet_free_ifa(struct in_ifaddr *ifa)
232 {
233 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
234 }
235 
236 void in_dev_finish_destroy(struct in_device *idev)
237 {
238 	struct net_device *dev = idev->dev;
239 
240 	WARN_ON(idev->ifa_list);
241 	WARN_ON(idev->mc_list);
242 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
243 #ifdef NET_REFCNT_DEBUG
244 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
245 #endif
246 	dev_put(dev);
247 	if (!idev->dead)
248 		pr_err("Freeing alive in_device %p\n", idev);
249 	else
250 		kfree(idev);
251 }
252 EXPORT_SYMBOL(in_dev_finish_destroy);
253 
254 static struct in_device *inetdev_init(struct net_device *dev)
255 {
256 	struct in_device *in_dev;
257 	int err = -ENOMEM;
258 
259 	ASSERT_RTNL();
260 
261 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
262 	if (!in_dev)
263 		goto out;
264 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
265 			sizeof(in_dev->cnf));
266 	in_dev->cnf.sysctl = NULL;
267 	in_dev->dev = dev;
268 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
269 	if (!in_dev->arp_parms)
270 		goto out_kfree;
271 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
272 		dev_disable_lro(dev);
273 	/* Reference in_dev->dev */
274 	dev_hold(dev);
275 	/* Account for reference dev->ip_ptr (below) */
276 	refcount_set(&in_dev->refcnt, 1);
277 
278 	err = devinet_sysctl_register(in_dev);
279 	if (err) {
280 		in_dev->dead = 1;
281 		neigh_parms_release(&arp_tbl, in_dev->arp_parms);
282 		in_dev_put(in_dev);
283 		in_dev = NULL;
284 		goto out;
285 	}
286 	ip_mc_init_dev(in_dev);
287 	if (dev->flags & IFF_UP)
288 		ip_mc_up(in_dev);
289 
290 	/* we can receive as soon as ip_ptr is set -- do this last */
291 	rcu_assign_pointer(dev->ip_ptr, in_dev);
292 out:
293 	return in_dev ?: ERR_PTR(err);
294 out_kfree:
295 	kfree(in_dev);
296 	in_dev = NULL;
297 	goto out;
298 }
299 
300 static void in_dev_rcu_put(struct rcu_head *head)
301 {
302 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
303 	in_dev_put(idev);
304 }
305 
306 static void inetdev_destroy(struct in_device *in_dev)
307 {
308 	struct net_device *dev;
309 	struct in_ifaddr *ifa;
310 
311 	ASSERT_RTNL();
312 
313 	dev = in_dev->dev;
314 
315 	in_dev->dead = 1;
316 
317 	ip_mc_destroy_dev(in_dev);
318 
319 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
320 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
321 		inet_free_ifa(ifa);
322 	}
323 
324 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
325 
326 	devinet_sysctl_unregister(in_dev);
327 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
328 	arp_ifdown(dev);
329 
330 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
331 }
332 
333 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
334 {
335 	const struct in_ifaddr *ifa;
336 
337 	rcu_read_lock();
338 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
339 		if (inet_ifa_match(a, ifa)) {
340 			if (!b || inet_ifa_match(b, ifa)) {
341 				rcu_read_unlock();
342 				return 1;
343 			}
344 		}
345 	}
346 	rcu_read_unlock();
347 	return 0;
348 }
349 
350 static void __inet_del_ifa(struct in_device *in_dev,
351 			   struct in_ifaddr __rcu **ifap,
352 			   int destroy, struct nlmsghdr *nlh, u32 portid)
353 {
354 	struct in_ifaddr *promote = NULL;
355 	struct in_ifaddr *ifa, *ifa1;
356 	struct in_ifaddr *last_prim;
357 	struct in_ifaddr *prev_prom = NULL;
358 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
359 
360 	ASSERT_RTNL();
361 
362 	ifa1 = rtnl_dereference(*ifap);
363 	last_prim = rtnl_dereference(in_dev->ifa_list);
364 	if (in_dev->dead)
365 		goto no_promotions;
366 
367 	/* 1. Deleting primary ifaddr forces deletion all secondaries
368 	 * unless alias promotion is set
369 	 **/
370 
371 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
372 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
373 
374 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
375 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
376 			    ifa1->ifa_scope <= ifa->ifa_scope)
377 				last_prim = ifa;
378 
379 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
380 			    ifa1->ifa_mask != ifa->ifa_mask ||
381 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
382 				ifap1 = &ifa->ifa_next;
383 				prev_prom = ifa;
384 				continue;
385 			}
386 
387 			if (!do_promote) {
388 				inet_hash_remove(ifa);
389 				*ifap1 = ifa->ifa_next;
390 
391 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
392 				blocking_notifier_call_chain(&inetaddr_chain,
393 						NETDEV_DOWN, ifa);
394 				inet_free_ifa(ifa);
395 			} else {
396 				promote = ifa;
397 				break;
398 			}
399 		}
400 	}
401 
402 	/* On promotion all secondaries from subnet are changing
403 	 * the primary IP, we must remove all their routes silently
404 	 * and later to add them back with new prefsrc. Do this
405 	 * while all addresses are on the device list.
406 	 */
407 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
408 		if (ifa1->ifa_mask == ifa->ifa_mask &&
409 		    inet_ifa_match(ifa1->ifa_address, ifa))
410 			fib_del_ifaddr(ifa, ifa1);
411 	}
412 
413 no_promotions:
414 	/* 2. Unlink it */
415 
416 	*ifap = ifa1->ifa_next;
417 	inet_hash_remove(ifa1);
418 
419 	/* 3. Announce address deletion */
420 
421 	/* Send message first, then call notifier.
422 	   At first sight, FIB update triggered by notifier
423 	   will refer to already deleted ifaddr, that could confuse
424 	   netlink listeners. It is not true: look, gated sees
425 	   that route deleted and if it still thinks that ifaddr
426 	   is valid, it will try to restore deleted routes... Grr.
427 	   So that, this order is correct.
428 	 */
429 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
430 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
431 
432 	if (promote) {
433 		struct in_ifaddr *next_sec;
434 
435 		next_sec = rtnl_dereference(promote->ifa_next);
436 		if (prev_prom) {
437 			struct in_ifaddr *last_sec;
438 
439 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
440 
441 			last_sec = rtnl_dereference(last_prim->ifa_next);
442 			rcu_assign_pointer(promote->ifa_next, last_sec);
443 			rcu_assign_pointer(last_prim->ifa_next, promote);
444 		}
445 
446 		promote->ifa_flags &= ~IFA_F_SECONDARY;
447 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
448 		blocking_notifier_call_chain(&inetaddr_chain,
449 				NETDEV_UP, promote);
450 		for (ifa = next_sec; ifa;
451 		     ifa = rtnl_dereference(ifa->ifa_next)) {
452 			if (ifa1->ifa_mask != ifa->ifa_mask ||
453 			    !inet_ifa_match(ifa1->ifa_address, ifa))
454 					continue;
455 			fib_add_ifaddr(ifa);
456 		}
457 
458 	}
459 	if (destroy)
460 		inet_free_ifa(ifa1);
461 }
462 
463 static void inet_del_ifa(struct in_device *in_dev,
464 			 struct in_ifaddr __rcu **ifap,
465 			 int destroy)
466 {
467 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
468 }
469 
470 static void check_lifetime(struct work_struct *work);
471 
472 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
473 
474 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
475 			     u32 portid, struct netlink_ext_ack *extack)
476 {
477 	struct in_ifaddr __rcu **last_primary, **ifap;
478 	struct in_device *in_dev = ifa->ifa_dev;
479 	struct in_validator_info ivi;
480 	struct in_ifaddr *ifa1;
481 	int ret;
482 
483 	ASSERT_RTNL();
484 
485 	if (!ifa->ifa_local) {
486 		inet_free_ifa(ifa);
487 		return 0;
488 	}
489 
490 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
491 	last_primary = &in_dev->ifa_list;
492 
493 	/* Don't set IPv6 only flags to IPv4 addresses */
494 	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
495 
496 	ifap = &in_dev->ifa_list;
497 	ifa1 = rtnl_dereference(*ifap);
498 
499 	while (ifa1) {
500 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
501 		    ifa->ifa_scope <= ifa1->ifa_scope)
502 			last_primary = &ifa1->ifa_next;
503 		if (ifa1->ifa_mask == ifa->ifa_mask &&
504 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
505 			if (ifa1->ifa_local == ifa->ifa_local) {
506 				inet_free_ifa(ifa);
507 				return -EEXIST;
508 			}
509 			if (ifa1->ifa_scope != ifa->ifa_scope) {
510 				inet_free_ifa(ifa);
511 				return -EINVAL;
512 			}
513 			ifa->ifa_flags |= IFA_F_SECONDARY;
514 		}
515 
516 		ifap = &ifa1->ifa_next;
517 		ifa1 = rtnl_dereference(*ifap);
518 	}
519 
520 	/* Allow any devices that wish to register ifaddr validtors to weigh
521 	 * in now, before changes are committed.  The rntl lock is serializing
522 	 * access here, so the state should not change between a validator call
523 	 * and a final notify on commit.  This isn't invoked on promotion under
524 	 * the assumption that validators are checking the address itself, and
525 	 * not the flags.
526 	 */
527 	ivi.ivi_addr = ifa->ifa_address;
528 	ivi.ivi_dev = ifa->ifa_dev;
529 	ivi.extack = extack;
530 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
531 					   NETDEV_UP, &ivi);
532 	ret = notifier_to_errno(ret);
533 	if (ret) {
534 		inet_free_ifa(ifa);
535 		return ret;
536 	}
537 
538 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
539 		prandom_seed((__force u32) ifa->ifa_local);
540 		ifap = last_primary;
541 	}
542 
543 	rcu_assign_pointer(ifa->ifa_next, *ifap);
544 	rcu_assign_pointer(*ifap, ifa);
545 
546 	inet_hash_insert(dev_net(in_dev->dev), ifa);
547 
548 	cancel_delayed_work(&check_lifetime_work);
549 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
550 
551 	/* Send message first, then call notifier.
552 	   Notifier will trigger FIB update, so that
553 	   listeners of netlink will know about new ifaddr */
554 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
555 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
556 
557 	return 0;
558 }
559 
560 static int inet_insert_ifa(struct in_ifaddr *ifa)
561 {
562 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
563 }
564 
565 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
566 {
567 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
568 
569 	ASSERT_RTNL();
570 
571 	if (!in_dev) {
572 		inet_free_ifa(ifa);
573 		return -ENOBUFS;
574 	}
575 	ipv4_devconf_setall(in_dev);
576 	neigh_parms_data_state_setall(in_dev->arp_parms);
577 	if (ifa->ifa_dev != in_dev) {
578 		WARN_ON(ifa->ifa_dev);
579 		in_dev_hold(in_dev);
580 		ifa->ifa_dev = in_dev;
581 	}
582 	if (ipv4_is_loopback(ifa->ifa_local))
583 		ifa->ifa_scope = RT_SCOPE_HOST;
584 	return inet_insert_ifa(ifa);
585 }
586 
587 /* Caller must hold RCU or RTNL :
588  * We dont take a reference on found in_device
589  */
590 struct in_device *inetdev_by_index(struct net *net, int ifindex)
591 {
592 	struct net_device *dev;
593 	struct in_device *in_dev = NULL;
594 
595 	rcu_read_lock();
596 	dev = dev_get_by_index_rcu(net, ifindex);
597 	if (dev)
598 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
599 	rcu_read_unlock();
600 	return in_dev;
601 }
602 EXPORT_SYMBOL(inetdev_by_index);
603 
604 /* Called only from RTNL semaphored context. No locks. */
605 
606 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
607 				    __be32 mask)
608 {
609 	struct in_ifaddr *ifa;
610 
611 	ASSERT_RTNL();
612 
613 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
614 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
615 			return ifa;
616 	}
617 	return NULL;
618 }
619 
620 static int ip_mc_autojoin_config(struct net *net, bool join,
621 				 const struct in_ifaddr *ifa)
622 {
623 #if defined(CONFIG_IP_MULTICAST)
624 	struct ip_mreqn mreq = {
625 		.imr_multiaddr.s_addr = ifa->ifa_address,
626 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
627 	};
628 	struct sock *sk = net->ipv4.mc_autojoin_sk;
629 	int ret;
630 
631 	ASSERT_RTNL();
632 
633 	lock_sock(sk);
634 	if (join)
635 		ret = ip_mc_join_group(sk, &mreq);
636 	else
637 		ret = ip_mc_leave_group(sk, &mreq);
638 	release_sock(sk);
639 
640 	return ret;
641 #else
642 	return -EOPNOTSUPP;
643 #endif
644 }
645 
646 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
647 			    struct netlink_ext_ack *extack)
648 {
649 	struct net *net = sock_net(skb->sk);
650 	struct in_ifaddr __rcu **ifap;
651 	struct nlattr *tb[IFA_MAX+1];
652 	struct in_device *in_dev;
653 	struct ifaddrmsg *ifm;
654 	struct in_ifaddr *ifa;
655 	int err;
656 
657 	ASSERT_RTNL();
658 
659 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
660 				     ifa_ipv4_policy, extack);
661 	if (err < 0)
662 		goto errout;
663 
664 	ifm = nlmsg_data(nlh);
665 	in_dev = inetdev_by_index(net, ifm->ifa_index);
666 	if (!in_dev) {
667 		err = -ENODEV;
668 		goto errout;
669 	}
670 
671 	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
672 	     ifap = &ifa->ifa_next) {
673 		if (tb[IFA_LOCAL] &&
674 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
675 			continue;
676 
677 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
678 			continue;
679 
680 		if (tb[IFA_ADDRESS] &&
681 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
682 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
683 			continue;
684 
685 		if (ipv4_is_multicast(ifa->ifa_address))
686 			ip_mc_autojoin_config(net, false, ifa);
687 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
688 		return 0;
689 	}
690 
691 	err = -EADDRNOTAVAIL;
692 errout:
693 	return err;
694 }
695 
696 #define INFINITY_LIFE_TIME	0xFFFFFFFF
697 
698 static void check_lifetime(struct work_struct *work)
699 {
700 	unsigned long now, next, next_sec, next_sched;
701 	struct in_ifaddr *ifa;
702 	struct hlist_node *n;
703 	int i;
704 
705 	now = jiffies;
706 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
707 
708 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
709 		bool change_needed = false;
710 
711 		rcu_read_lock();
712 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
713 			unsigned long age;
714 
715 			if (ifa->ifa_flags & IFA_F_PERMANENT)
716 				continue;
717 
718 			/* We try to batch several events at once. */
719 			age = (now - ifa->ifa_tstamp +
720 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
721 
722 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
723 			    age >= ifa->ifa_valid_lft) {
724 				change_needed = true;
725 			} else if (ifa->ifa_preferred_lft ==
726 				   INFINITY_LIFE_TIME) {
727 				continue;
728 			} else if (age >= ifa->ifa_preferred_lft) {
729 				if (time_before(ifa->ifa_tstamp +
730 						ifa->ifa_valid_lft * HZ, next))
731 					next = ifa->ifa_tstamp +
732 					       ifa->ifa_valid_lft * HZ;
733 
734 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
735 					change_needed = true;
736 			} else if (time_before(ifa->ifa_tstamp +
737 					       ifa->ifa_preferred_lft * HZ,
738 					       next)) {
739 				next = ifa->ifa_tstamp +
740 				       ifa->ifa_preferred_lft * HZ;
741 			}
742 		}
743 		rcu_read_unlock();
744 		if (!change_needed)
745 			continue;
746 		rtnl_lock();
747 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
748 			unsigned long age;
749 
750 			if (ifa->ifa_flags & IFA_F_PERMANENT)
751 				continue;
752 
753 			/* We try to batch several events at once. */
754 			age = (now - ifa->ifa_tstamp +
755 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
756 
757 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
758 			    age >= ifa->ifa_valid_lft) {
759 				struct in_ifaddr __rcu **ifap;
760 				struct in_ifaddr *tmp;
761 
762 				ifap = &ifa->ifa_dev->ifa_list;
763 				tmp = rtnl_dereference(*ifap);
764 				while (tmp) {
765 					if (tmp == ifa) {
766 						inet_del_ifa(ifa->ifa_dev,
767 							     ifap, 1);
768 						break;
769 					}
770 					ifap = &tmp->ifa_next;
771 					tmp = rtnl_dereference(*ifap);
772 				}
773 			} else if (ifa->ifa_preferred_lft !=
774 				   INFINITY_LIFE_TIME &&
775 				   age >= ifa->ifa_preferred_lft &&
776 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
777 				ifa->ifa_flags |= IFA_F_DEPRECATED;
778 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
779 			}
780 		}
781 		rtnl_unlock();
782 	}
783 
784 	next_sec = round_jiffies_up(next);
785 	next_sched = next;
786 
787 	/* If rounded timeout is accurate enough, accept it. */
788 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
789 		next_sched = next_sec;
790 
791 	now = jiffies;
792 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
793 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
794 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
795 
796 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
797 			next_sched - now);
798 }
799 
800 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
801 			     __u32 prefered_lft)
802 {
803 	unsigned long timeout;
804 
805 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
806 
807 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
808 	if (addrconf_finite_timeout(timeout))
809 		ifa->ifa_valid_lft = timeout;
810 	else
811 		ifa->ifa_flags |= IFA_F_PERMANENT;
812 
813 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
814 	if (addrconf_finite_timeout(timeout)) {
815 		if (timeout == 0)
816 			ifa->ifa_flags |= IFA_F_DEPRECATED;
817 		ifa->ifa_preferred_lft = timeout;
818 	}
819 	ifa->ifa_tstamp = jiffies;
820 	if (!ifa->ifa_cstamp)
821 		ifa->ifa_cstamp = ifa->ifa_tstamp;
822 }
823 
824 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
825 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
826 				       struct netlink_ext_ack *extack)
827 {
828 	struct nlattr *tb[IFA_MAX+1];
829 	struct in_ifaddr *ifa;
830 	struct ifaddrmsg *ifm;
831 	struct net_device *dev;
832 	struct in_device *in_dev;
833 	int err;
834 
835 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
836 				     ifa_ipv4_policy, extack);
837 	if (err < 0)
838 		goto errout;
839 
840 	ifm = nlmsg_data(nlh);
841 	err = -EINVAL;
842 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
843 		goto errout;
844 
845 	dev = __dev_get_by_index(net, ifm->ifa_index);
846 	err = -ENODEV;
847 	if (!dev)
848 		goto errout;
849 
850 	in_dev = __in_dev_get_rtnl(dev);
851 	err = -ENOBUFS;
852 	if (!in_dev)
853 		goto errout;
854 
855 	ifa = inet_alloc_ifa();
856 	if (!ifa)
857 		/*
858 		 * A potential indev allocation can be left alive, it stays
859 		 * assigned to its device and is destroy with it.
860 		 */
861 		goto errout;
862 
863 	ipv4_devconf_setall(in_dev);
864 	neigh_parms_data_state_setall(in_dev->arp_parms);
865 	in_dev_hold(in_dev);
866 
867 	if (!tb[IFA_ADDRESS])
868 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
869 
870 	INIT_HLIST_NODE(&ifa->hash);
871 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
872 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
873 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
874 					 ifm->ifa_flags;
875 	ifa->ifa_scope = ifm->ifa_scope;
876 	ifa->ifa_dev = in_dev;
877 
878 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
879 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
880 
881 	if (tb[IFA_BROADCAST])
882 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
883 
884 	if (tb[IFA_LABEL])
885 		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
886 	else
887 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
888 
889 	if (tb[IFA_RT_PRIORITY])
890 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
891 
892 	if (tb[IFA_CACHEINFO]) {
893 		struct ifa_cacheinfo *ci;
894 
895 		ci = nla_data(tb[IFA_CACHEINFO]);
896 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
897 			err = -EINVAL;
898 			goto errout_free;
899 		}
900 		*pvalid_lft = ci->ifa_valid;
901 		*pprefered_lft = ci->ifa_prefered;
902 	}
903 
904 	return ifa;
905 
906 errout_free:
907 	inet_free_ifa(ifa);
908 errout:
909 	return ERR_PTR(err);
910 }
911 
912 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
913 {
914 	struct in_device *in_dev = ifa->ifa_dev;
915 	struct in_ifaddr *ifa1;
916 
917 	if (!ifa->ifa_local)
918 		return NULL;
919 
920 	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
921 		if (ifa1->ifa_mask == ifa->ifa_mask &&
922 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
923 		    ifa1->ifa_local == ifa->ifa_local)
924 			return ifa1;
925 	}
926 	return NULL;
927 }
928 
929 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
930 			    struct netlink_ext_ack *extack)
931 {
932 	struct net *net = sock_net(skb->sk);
933 	struct in_ifaddr *ifa;
934 	struct in_ifaddr *ifa_existing;
935 	__u32 valid_lft = INFINITY_LIFE_TIME;
936 	__u32 prefered_lft = INFINITY_LIFE_TIME;
937 
938 	ASSERT_RTNL();
939 
940 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
941 	if (IS_ERR(ifa))
942 		return PTR_ERR(ifa);
943 
944 	ifa_existing = find_matching_ifa(ifa);
945 	if (!ifa_existing) {
946 		/* It would be best to check for !NLM_F_CREATE here but
947 		 * userspace already relies on not having to provide this.
948 		 */
949 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
950 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
951 			int ret = ip_mc_autojoin_config(net, true, ifa);
952 
953 			if (ret < 0) {
954 				inet_free_ifa(ifa);
955 				return ret;
956 			}
957 		}
958 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
959 					 extack);
960 	} else {
961 		u32 new_metric = ifa->ifa_rt_priority;
962 
963 		inet_free_ifa(ifa);
964 
965 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
966 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
967 			return -EEXIST;
968 		ifa = ifa_existing;
969 
970 		if (ifa->ifa_rt_priority != new_metric) {
971 			fib_modify_prefix_metric(ifa, new_metric);
972 			ifa->ifa_rt_priority = new_metric;
973 		}
974 
975 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
976 		cancel_delayed_work(&check_lifetime_work);
977 		queue_delayed_work(system_power_efficient_wq,
978 				&check_lifetime_work, 0);
979 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
980 	}
981 	return 0;
982 }
983 
984 /*
985  *	Determine a default network mask, based on the IP address.
986  */
987 
988 static int inet_abc_len(__be32 addr)
989 {
990 	int rc = -1;	/* Something else, probably a multicast. */
991 
992 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
993 		rc = 0;
994 	else {
995 		__u32 haddr = ntohl(addr);
996 		if (IN_CLASSA(haddr))
997 			rc = 8;
998 		else if (IN_CLASSB(haddr))
999 			rc = 16;
1000 		else if (IN_CLASSC(haddr))
1001 			rc = 24;
1002 		else if (IN_CLASSE(haddr))
1003 			rc = 32;
1004 	}
1005 
1006 	return rc;
1007 }
1008 
1009 
1010 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1011 {
1012 	struct sockaddr_in sin_orig;
1013 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1014 	struct in_ifaddr __rcu **ifap = NULL;
1015 	struct in_device *in_dev;
1016 	struct in_ifaddr *ifa = NULL;
1017 	struct net_device *dev;
1018 	char *colon;
1019 	int ret = -EFAULT;
1020 	int tryaddrmatch = 0;
1021 
1022 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1023 
1024 	/* save original address for comparison */
1025 	memcpy(&sin_orig, sin, sizeof(*sin));
1026 
1027 	colon = strchr(ifr->ifr_name, ':');
1028 	if (colon)
1029 		*colon = 0;
1030 
1031 	dev_load(net, ifr->ifr_name);
1032 
1033 	switch (cmd) {
1034 	case SIOCGIFADDR:	/* Get interface address */
1035 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1036 	case SIOCGIFDSTADDR:	/* Get the destination address */
1037 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1038 		/* Note that these ioctls will not sleep,
1039 		   so that we do not impose a lock.
1040 		   One day we will be forced to put shlock here (I mean SMP)
1041 		 */
1042 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1043 		memset(sin, 0, sizeof(*sin));
1044 		sin->sin_family = AF_INET;
1045 		break;
1046 
1047 	case SIOCSIFFLAGS:
1048 		ret = -EPERM;
1049 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1050 			goto out;
1051 		break;
1052 	case SIOCSIFADDR:	/* Set interface address (and family) */
1053 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1054 	case SIOCSIFDSTADDR:	/* Set the destination address */
1055 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1056 		ret = -EPERM;
1057 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1058 			goto out;
1059 		ret = -EINVAL;
1060 		if (sin->sin_family != AF_INET)
1061 			goto out;
1062 		break;
1063 	default:
1064 		ret = -EINVAL;
1065 		goto out;
1066 	}
1067 
1068 	rtnl_lock();
1069 
1070 	ret = -ENODEV;
1071 	dev = __dev_get_by_name(net, ifr->ifr_name);
1072 	if (!dev)
1073 		goto done;
1074 
1075 	if (colon)
1076 		*colon = ':';
1077 
1078 	in_dev = __in_dev_get_rtnl(dev);
1079 	if (in_dev) {
1080 		if (tryaddrmatch) {
1081 			/* Matthias Andree */
1082 			/* compare label and address (4.4BSD style) */
1083 			/* note: we only do this for a limited set of ioctls
1084 			   and only if the original address family was AF_INET.
1085 			   This is checked above. */
1086 
1087 			for (ifap = &in_dev->ifa_list;
1088 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1089 			     ifap = &ifa->ifa_next) {
1090 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1091 				    sin_orig.sin_addr.s_addr ==
1092 							ifa->ifa_local) {
1093 					break; /* found */
1094 				}
1095 			}
1096 		}
1097 		/* we didn't get a match, maybe the application is
1098 		   4.3BSD-style and passed in junk so we fall back to
1099 		   comparing just the label */
1100 		if (!ifa) {
1101 			for (ifap = &in_dev->ifa_list;
1102 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1103 			     ifap = &ifa->ifa_next)
1104 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1105 					break;
1106 		}
1107 	}
1108 
1109 	ret = -EADDRNOTAVAIL;
1110 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1111 		goto done;
1112 
1113 	switch (cmd) {
1114 	case SIOCGIFADDR:	/* Get interface address */
1115 		ret = 0;
1116 		sin->sin_addr.s_addr = ifa->ifa_local;
1117 		break;
1118 
1119 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1120 		ret = 0;
1121 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1122 		break;
1123 
1124 	case SIOCGIFDSTADDR:	/* Get the destination address */
1125 		ret = 0;
1126 		sin->sin_addr.s_addr = ifa->ifa_address;
1127 		break;
1128 
1129 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1130 		ret = 0;
1131 		sin->sin_addr.s_addr = ifa->ifa_mask;
1132 		break;
1133 
1134 	case SIOCSIFFLAGS:
1135 		if (colon) {
1136 			ret = -EADDRNOTAVAIL;
1137 			if (!ifa)
1138 				break;
1139 			ret = 0;
1140 			if (!(ifr->ifr_flags & IFF_UP))
1141 				inet_del_ifa(in_dev, ifap, 1);
1142 			break;
1143 		}
1144 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1145 		break;
1146 
1147 	case SIOCSIFADDR:	/* Set interface address (and family) */
1148 		ret = -EINVAL;
1149 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1150 			break;
1151 
1152 		if (!ifa) {
1153 			ret = -ENOBUFS;
1154 			ifa = inet_alloc_ifa();
1155 			if (!ifa)
1156 				break;
1157 			INIT_HLIST_NODE(&ifa->hash);
1158 			if (colon)
1159 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1160 			else
1161 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1162 		} else {
1163 			ret = 0;
1164 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1165 				break;
1166 			inet_del_ifa(in_dev, ifap, 0);
1167 			ifa->ifa_broadcast = 0;
1168 			ifa->ifa_scope = 0;
1169 		}
1170 
1171 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1172 
1173 		if (!(dev->flags & IFF_POINTOPOINT)) {
1174 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1175 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1176 			if ((dev->flags & IFF_BROADCAST) &&
1177 			    ifa->ifa_prefixlen < 31)
1178 				ifa->ifa_broadcast = ifa->ifa_address |
1179 						     ~ifa->ifa_mask;
1180 		} else {
1181 			ifa->ifa_prefixlen = 32;
1182 			ifa->ifa_mask = inet_make_mask(32);
1183 		}
1184 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1185 		ret = inet_set_ifa(dev, ifa);
1186 		break;
1187 
1188 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1189 		ret = 0;
1190 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1191 			inet_del_ifa(in_dev, ifap, 0);
1192 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1193 			inet_insert_ifa(ifa);
1194 		}
1195 		break;
1196 
1197 	case SIOCSIFDSTADDR:	/* Set the destination address */
1198 		ret = 0;
1199 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1200 			break;
1201 		ret = -EINVAL;
1202 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1203 			break;
1204 		ret = 0;
1205 		inet_del_ifa(in_dev, ifap, 0);
1206 		ifa->ifa_address = sin->sin_addr.s_addr;
1207 		inet_insert_ifa(ifa);
1208 		break;
1209 
1210 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1211 
1212 		/*
1213 		 *	The mask we set must be legal.
1214 		 */
1215 		ret = -EINVAL;
1216 		if (bad_mask(sin->sin_addr.s_addr, 0))
1217 			break;
1218 		ret = 0;
1219 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1220 			__be32 old_mask = ifa->ifa_mask;
1221 			inet_del_ifa(in_dev, ifap, 0);
1222 			ifa->ifa_mask = sin->sin_addr.s_addr;
1223 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1224 
1225 			/* See if current broadcast address matches
1226 			 * with current netmask, then recalculate
1227 			 * the broadcast address. Otherwise it's a
1228 			 * funny address, so don't touch it since
1229 			 * the user seems to know what (s)he's doing...
1230 			 */
1231 			if ((dev->flags & IFF_BROADCAST) &&
1232 			    (ifa->ifa_prefixlen < 31) &&
1233 			    (ifa->ifa_broadcast ==
1234 			     (ifa->ifa_local|~old_mask))) {
1235 				ifa->ifa_broadcast = (ifa->ifa_local |
1236 						      ~sin->sin_addr.s_addr);
1237 			}
1238 			inet_insert_ifa(ifa);
1239 		}
1240 		break;
1241 	}
1242 done:
1243 	rtnl_unlock();
1244 out:
1245 	return ret;
1246 }
1247 
1248 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1249 {
1250 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1251 	const struct in_ifaddr *ifa;
1252 	struct ifreq ifr;
1253 	int done = 0;
1254 
1255 	if (WARN_ON(size > sizeof(struct ifreq)))
1256 		goto out;
1257 
1258 	if (!in_dev)
1259 		goto out;
1260 
1261 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1262 		if (!buf) {
1263 			done += size;
1264 			continue;
1265 		}
1266 		if (len < size)
1267 			break;
1268 		memset(&ifr, 0, sizeof(struct ifreq));
1269 		strcpy(ifr.ifr_name, ifa->ifa_label);
1270 
1271 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1272 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1273 								ifa->ifa_local;
1274 
1275 		if (copy_to_user(buf + done, &ifr, size)) {
1276 			done = -EFAULT;
1277 			break;
1278 		}
1279 		len  -= size;
1280 		done += size;
1281 	}
1282 out:
1283 	return done;
1284 }
1285 
1286 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1287 				 int scope)
1288 {
1289 	const struct in_ifaddr *ifa;
1290 
1291 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1292 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1293 			continue;
1294 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1295 		    ifa->ifa_scope <= scope)
1296 			return ifa->ifa_local;
1297 	}
1298 
1299 	return 0;
1300 }
1301 
1302 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1303 {
1304 	const struct in_ifaddr *ifa;
1305 	__be32 addr = 0;
1306 	unsigned char localnet_scope = RT_SCOPE_HOST;
1307 	struct in_device *in_dev;
1308 	struct net *net = dev_net(dev);
1309 	int master_idx;
1310 
1311 	rcu_read_lock();
1312 	in_dev = __in_dev_get_rcu(dev);
1313 	if (!in_dev)
1314 		goto no_in_dev;
1315 
1316 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1317 		localnet_scope = RT_SCOPE_LINK;
1318 
1319 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1320 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1321 			continue;
1322 		if (min(ifa->ifa_scope, localnet_scope) > scope)
1323 			continue;
1324 		if (!dst || inet_ifa_match(dst, ifa)) {
1325 			addr = ifa->ifa_local;
1326 			break;
1327 		}
1328 		if (!addr)
1329 			addr = ifa->ifa_local;
1330 	}
1331 
1332 	if (addr)
1333 		goto out_unlock;
1334 no_in_dev:
1335 	master_idx = l3mdev_master_ifindex_rcu(dev);
1336 
1337 	/* For VRFs, the VRF device takes the place of the loopback device,
1338 	 * with addresses on it being preferred.  Note in such cases the
1339 	 * loopback device will be among the devices that fail the master_idx
1340 	 * equality check in the loop below.
1341 	 */
1342 	if (master_idx &&
1343 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1344 	    (in_dev = __in_dev_get_rcu(dev))) {
1345 		addr = in_dev_select_addr(in_dev, scope);
1346 		if (addr)
1347 			goto out_unlock;
1348 	}
1349 
1350 	/* Not loopback addresses on loopback should be preferred
1351 	   in this case. It is important that lo is the first interface
1352 	   in dev_base list.
1353 	 */
1354 	for_each_netdev_rcu(net, dev) {
1355 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1356 			continue;
1357 
1358 		in_dev = __in_dev_get_rcu(dev);
1359 		if (!in_dev)
1360 			continue;
1361 
1362 		addr = in_dev_select_addr(in_dev, scope);
1363 		if (addr)
1364 			goto out_unlock;
1365 	}
1366 out_unlock:
1367 	rcu_read_unlock();
1368 	return addr;
1369 }
1370 EXPORT_SYMBOL(inet_select_addr);
1371 
1372 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1373 			      __be32 local, int scope)
1374 {
1375 	unsigned char localnet_scope = RT_SCOPE_HOST;
1376 	const struct in_ifaddr *ifa;
1377 	__be32 addr = 0;
1378 	int same = 0;
1379 
1380 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1381 		localnet_scope = RT_SCOPE_LINK;
1382 
1383 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1384 		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1385 
1386 		if (!addr &&
1387 		    (local == ifa->ifa_local || !local) &&
1388 		    min_scope <= scope) {
1389 			addr = ifa->ifa_local;
1390 			if (same)
1391 				break;
1392 		}
1393 		if (!same) {
1394 			same = (!local || inet_ifa_match(local, ifa)) &&
1395 				(!dst || inet_ifa_match(dst, ifa));
1396 			if (same && addr) {
1397 				if (local || !dst)
1398 					break;
1399 				/* Is the selected addr into dst subnet? */
1400 				if (inet_ifa_match(addr, ifa))
1401 					break;
1402 				/* No, then can we use new local src? */
1403 				if (min_scope <= scope) {
1404 					addr = ifa->ifa_local;
1405 					break;
1406 				}
1407 				/* search for large dst subnet for addr */
1408 				same = 0;
1409 			}
1410 		}
1411 	}
1412 
1413 	return same ? addr : 0;
1414 }
1415 
1416 /*
1417  * Confirm that local IP address exists using wildcards:
1418  * - net: netns to check, cannot be NULL
1419  * - in_dev: only on this interface, NULL=any interface
1420  * - dst: only in the same subnet as dst, 0=any dst
1421  * - local: address, 0=autoselect the local address
1422  * - scope: maximum allowed scope value for the local address
1423  */
1424 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1425 			 __be32 dst, __be32 local, int scope)
1426 {
1427 	__be32 addr = 0;
1428 	struct net_device *dev;
1429 
1430 	if (in_dev)
1431 		return confirm_addr_indev(in_dev, dst, local, scope);
1432 
1433 	rcu_read_lock();
1434 	for_each_netdev_rcu(net, dev) {
1435 		in_dev = __in_dev_get_rcu(dev);
1436 		if (in_dev) {
1437 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1438 			if (addr)
1439 				break;
1440 		}
1441 	}
1442 	rcu_read_unlock();
1443 
1444 	return addr;
1445 }
1446 EXPORT_SYMBOL(inet_confirm_addr);
1447 
1448 /*
1449  *	Device notifier
1450  */
1451 
1452 int register_inetaddr_notifier(struct notifier_block *nb)
1453 {
1454 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1455 }
1456 EXPORT_SYMBOL(register_inetaddr_notifier);
1457 
1458 int unregister_inetaddr_notifier(struct notifier_block *nb)
1459 {
1460 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1461 }
1462 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1463 
1464 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1465 {
1466 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1467 }
1468 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1469 
1470 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1471 {
1472 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1473 	    nb);
1474 }
1475 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1476 
1477 /* Rename ifa_labels for a device name change. Make some effort to preserve
1478  * existing alias numbering and to create unique labels if possible.
1479 */
1480 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1481 {
1482 	struct in_ifaddr *ifa;
1483 	int named = 0;
1484 
1485 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1486 		char old[IFNAMSIZ], *dot;
1487 
1488 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1489 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1490 		if (named++ == 0)
1491 			goto skip;
1492 		dot = strchr(old, ':');
1493 		if (!dot) {
1494 			sprintf(old, ":%d", named);
1495 			dot = old;
1496 		}
1497 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1498 			strcat(ifa->ifa_label, dot);
1499 		else
1500 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1501 skip:
1502 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1503 	}
1504 }
1505 
1506 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1507 					struct in_device *in_dev)
1508 
1509 {
1510 	const struct in_ifaddr *ifa;
1511 
1512 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1513 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1514 			 ifa->ifa_local, dev,
1515 			 ifa->ifa_local, NULL,
1516 			 dev->dev_addr, NULL);
1517 	}
1518 }
1519 
1520 /* Called only under RTNL semaphore */
1521 
1522 static int inetdev_event(struct notifier_block *this, unsigned long event,
1523 			 void *ptr)
1524 {
1525 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1526 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1527 
1528 	ASSERT_RTNL();
1529 
1530 	if (!in_dev) {
1531 		if (event == NETDEV_REGISTER) {
1532 			in_dev = inetdev_init(dev);
1533 			if (IS_ERR(in_dev))
1534 				return notifier_from_errno(PTR_ERR(in_dev));
1535 			if (dev->flags & IFF_LOOPBACK) {
1536 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1537 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1538 			}
1539 		} else if (event == NETDEV_CHANGEMTU) {
1540 			/* Re-enabling IP */
1541 			if (inetdev_valid_mtu(dev->mtu))
1542 				in_dev = inetdev_init(dev);
1543 		}
1544 		goto out;
1545 	}
1546 
1547 	switch (event) {
1548 	case NETDEV_REGISTER:
1549 		pr_debug("%s: bug\n", __func__);
1550 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1551 		break;
1552 	case NETDEV_UP:
1553 		if (!inetdev_valid_mtu(dev->mtu))
1554 			break;
1555 		if (dev->flags & IFF_LOOPBACK) {
1556 			struct in_ifaddr *ifa = inet_alloc_ifa();
1557 
1558 			if (ifa) {
1559 				INIT_HLIST_NODE(&ifa->hash);
1560 				ifa->ifa_local =
1561 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1562 				ifa->ifa_prefixlen = 8;
1563 				ifa->ifa_mask = inet_make_mask(8);
1564 				in_dev_hold(in_dev);
1565 				ifa->ifa_dev = in_dev;
1566 				ifa->ifa_scope = RT_SCOPE_HOST;
1567 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1568 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1569 						 INFINITY_LIFE_TIME);
1570 				ipv4_devconf_setall(in_dev);
1571 				neigh_parms_data_state_setall(in_dev->arp_parms);
1572 				inet_insert_ifa(ifa);
1573 			}
1574 		}
1575 		ip_mc_up(in_dev);
1576 		fallthrough;
1577 	case NETDEV_CHANGEADDR:
1578 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1579 			break;
1580 		fallthrough;
1581 	case NETDEV_NOTIFY_PEERS:
1582 		/* Send gratuitous ARP to notify of link change */
1583 		inetdev_send_gratuitous_arp(dev, in_dev);
1584 		break;
1585 	case NETDEV_DOWN:
1586 		ip_mc_down(in_dev);
1587 		break;
1588 	case NETDEV_PRE_TYPE_CHANGE:
1589 		ip_mc_unmap(in_dev);
1590 		break;
1591 	case NETDEV_POST_TYPE_CHANGE:
1592 		ip_mc_remap(in_dev);
1593 		break;
1594 	case NETDEV_CHANGEMTU:
1595 		if (inetdev_valid_mtu(dev->mtu))
1596 			break;
1597 		/* disable IP when MTU is not enough */
1598 		fallthrough;
1599 	case NETDEV_UNREGISTER:
1600 		inetdev_destroy(in_dev);
1601 		break;
1602 	case NETDEV_CHANGENAME:
1603 		/* Do not notify about label change, this event is
1604 		 * not interesting to applications using netlink.
1605 		 */
1606 		inetdev_changename(dev, in_dev);
1607 
1608 		devinet_sysctl_unregister(in_dev);
1609 		devinet_sysctl_register(in_dev);
1610 		break;
1611 	}
1612 out:
1613 	return NOTIFY_DONE;
1614 }
1615 
1616 static struct notifier_block ip_netdev_notifier = {
1617 	.notifier_call = inetdev_event,
1618 };
1619 
1620 static size_t inet_nlmsg_size(void)
1621 {
1622 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1623 	       + nla_total_size(4) /* IFA_ADDRESS */
1624 	       + nla_total_size(4) /* IFA_LOCAL */
1625 	       + nla_total_size(4) /* IFA_BROADCAST */
1626 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1627 	       + nla_total_size(4)  /* IFA_FLAGS */
1628 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1629 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1630 }
1631 
1632 static inline u32 cstamp_delta(unsigned long cstamp)
1633 {
1634 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1635 }
1636 
1637 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1638 			 unsigned long tstamp, u32 preferred, u32 valid)
1639 {
1640 	struct ifa_cacheinfo ci;
1641 
1642 	ci.cstamp = cstamp_delta(cstamp);
1643 	ci.tstamp = cstamp_delta(tstamp);
1644 	ci.ifa_prefered = preferred;
1645 	ci.ifa_valid = valid;
1646 
1647 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1648 }
1649 
1650 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1651 			    struct inet_fill_args *args)
1652 {
1653 	struct ifaddrmsg *ifm;
1654 	struct nlmsghdr  *nlh;
1655 	u32 preferred, valid;
1656 
1657 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1658 			args->flags);
1659 	if (!nlh)
1660 		return -EMSGSIZE;
1661 
1662 	ifm = nlmsg_data(nlh);
1663 	ifm->ifa_family = AF_INET;
1664 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1665 	ifm->ifa_flags = ifa->ifa_flags;
1666 	ifm->ifa_scope = ifa->ifa_scope;
1667 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1668 
1669 	if (args->netnsid >= 0 &&
1670 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1671 		goto nla_put_failure;
1672 
1673 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1674 		preferred = ifa->ifa_preferred_lft;
1675 		valid = ifa->ifa_valid_lft;
1676 		if (preferred != INFINITY_LIFE_TIME) {
1677 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1678 
1679 			if (preferred > tval)
1680 				preferred -= tval;
1681 			else
1682 				preferred = 0;
1683 			if (valid != INFINITY_LIFE_TIME) {
1684 				if (valid > tval)
1685 					valid -= tval;
1686 				else
1687 					valid = 0;
1688 			}
1689 		}
1690 	} else {
1691 		preferred = INFINITY_LIFE_TIME;
1692 		valid = INFINITY_LIFE_TIME;
1693 	}
1694 	if ((ifa->ifa_address &&
1695 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1696 	    (ifa->ifa_local &&
1697 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1698 	    (ifa->ifa_broadcast &&
1699 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1700 	    (ifa->ifa_label[0] &&
1701 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1702 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1703 	    (ifa->ifa_rt_priority &&
1704 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1705 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1706 			  preferred, valid))
1707 		goto nla_put_failure;
1708 
1709 	nlmsg_end(skb, nlh);
1710 	return 0;
1711 
1712 nla_put_failure:
1713 	nlmsg_cancel(skb, nlh);
1714 	return -EMSGSIZE;
1715 }
1716 
1717 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1718 				      struct inet_fill_args *fillargs,
1719 				      struct net **tgt_net, struct sock *sk,
1720 				      struct netlink_callback *cb)
1721 {
1722 	struct netlink_ext_ack *extack = cb->extack;
1723 	struct nlattr *tb[IFA_MAX+1];
1724 	struct ifaddrmsg *ifm;
1725 	int err, i;
1726 
1727 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1728 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1729 		return -EINVAL;
1730 	}
1731 
1732 	ifm = nlmsg_data(nlh);
1733 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1734 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1735 		return -EINVAL;
1736 	}
1737 
1738 	fillargs->ifindex = ifm->ifa_index;
1739 	if (fillargs->ifindex) {
1740 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1741 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1742 	}
1743 
1744 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1745 					    ifa_ipv4_policy, extack);
1746 	if (err < 0)
1747 		return err;
1748 
1749 	for (i = 0; i <= IFA_MAX; ++i) {
1750 		if (!tb[i])
1751 			continue;
1752 
1753 		if (i == IFA_TARGET_NETNSID) {
1754 			struct net *net;
1755 
1756 			fillargs->netnsid = nla_get_s32(tb[i]);
1757 
1758 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1759 			if (IS_ERR(net)) {
1760 				fillargs->netnsid = -1;
1761 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1762 				return PTR_ERR(net);
1763 			}
1764 			*tgt_net = net;
1765 		} else {
1766 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1767 			return -EINVAL;
1768 		}
1769 	}
1770 
1771 	return 0;
1772 }
1773 
1774 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1775 			    struct netlink_callback *cb, int s_ip_idx,
1776 			    struct inet_fill_args *fillargs)
1777 {
1778 	struct in_ifaddr *ifa;
1779 	int ip_idx = 0;
1780 	int err;
1781 
1782 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1783 		if (ip_idx < s_ip_idx) {
1784 			ip_idx++;
1785 			continue;
1786 		}
1787 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1788 		if (err < 0)
1789 			goto done;
1790 
1791 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1792 		ip_idx++;
1793 	}
1794 	err = 0;
1795 
1796 done:
1797 	cb->args[2] = ip_idx;
1798 
1799 	return err;
1800 }
1801 
1802 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1803 {
1804 	const struct nlmsghdr *nlh = cb->nlh;
1805 	struct inet_fill_args fillargs = {
1806 		.portid = NETLINK_CB(cb->skb).portid,
1807 		.seq = nlh->nlmsg_seq,
1808 		.event = RTM_NEWADDR,
1809 		.flags = NLM_F_MULTI,
1810 		.netnsid = -1,
1811 	};
1812 	struct net *net = sock_net(skb->sk);
1813 	struct net *tgt_net = net;
1814 	int h, s_h;
1815 	int idx, s_idx;
1816 	int s_ip_idx;
1817 	struct net_device *dev;
1818 	struct in_device *in_dev;
1819 	struct hlist_head *head;
1820 	int err = 0;
1821 
1822 	s_h = cb->args[0];
1823 	s_idx = idx = cb->args[1];
1824 	s_ip_idx = cb->args[2];
1825 
1826 	if (cb->strict_check) {
1827 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1828 						 skb->sk, cb);
1829 		if (err < 0)
1830 			goto put_tgt_net;
1831 
1832 		err = 0;
1833 		if (fillargs.ifindex) {
1834 			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1835 			if (!dev) {
1836 				err = -ENODEV;
1837 				goto put_tgt_net;
1838 			}
1839 
1840 			in_dev = __in_dev_get_rtnl(dev);
1841 			if (in_dev) {
1842 				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1843 						       &fillargs);
1844 			}
1845 			goto put_tgt_net;
1846 		}
1847 	}
1848 
1849 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1850 		idx = 0;
1851 		head = &tgt_net->dev_index_head[h];
1852 		rcu_read_lock();
1853 		cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1854 			  tgt_net->dev_base_seq;
1855 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1856 			if (idx < s_idx)
1857 				goto cont;
1858 			if (h > s_h || idx > s_idx)
1859 				s_ip_idx = 0;
1860 			in_dev = __in_dev_get_rcu(dev);
1861 			if (!in_dev)
1862 				goto cont;
1863 
1864 			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1865 					       &fillargs);
1866 			if (err < 0) {
1867 				rcu_read_unlock();
1868 				goto done;
1869 			}
1870 cont:
1871 			idx++;
1872 		}
1873 		rcu_read_unlock();
1874 	}
1875 
1876 done:
1877 	cb->args[0] = h;
1878 	cb->args[1] = idx;
1879 put_tgt_net:
1880 	if (fillargs.netnsid >= 0)
1881 		put_net(tgt_net);
1882 
1883 	return skb->len ? : err;
1884 }
1885 
1886 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1887 		      u32 portid)
1888 {
1889 	struct inet_fill_args fillargs = {
1890 		.portid = portid,
1891 		.seq = nlh ? nlh->nlmsg_seq : 0,
1892 		.event = event,
1893 		.flags = 0,
1894 		.netnsid = -1,
1895 	};
1896 	struct sk_buff *skb;
1897 	int err = -ENOBUFS;
1898 	struct net *net;
1899 
1900 	net = dev_net(ifa->ifa_dev->dev);
1901 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1902 	if (!skb)
1903 		goto errout;
1904 
1905 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1906 	if (err < 0) {
1907 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1908 		WARN_ON(err == -EMSGSIZE);
1909 		kfree_skb(skb);
1910 		goto errout;
1911 	}
1912 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1913 	return;
1914 errout:
1915 	if (err < 0)
1916 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1917 }
1918 
1919 static size_t inet_get_link_af_size(const struct net_device *dev,
1920 				    u32 ext_filter_mask)
1921 {
1922 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1923 
1924 	if (!in_dev)
1925 		return 0;
1926 
1927 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1928 }
1929 
1930 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1931 			     u32 ext_filter_mask)
1932 {
1933 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1934 	struct nlattr *nla;
1935 	int i;
1936 
1937 	if (!in_dev)
1938 		return -ENODATA;
1939 
1940 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1941 	if (!nla)
1942 		return -EMSGSIZE;
1943 
1944 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1945 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1946 
1947 	return 0;
1948 }
1949 
1950 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1951 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1952 };
1953 
1954 static int inet_validate_link_af(const struct net_device *dev,
1955 				 const struct nlattr *nla,
1956 				 struct netlink_ext_ack *extack)
1957 {
1958 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1959 	int err, rem;
1960 
1961 	if (dev && !__in_dev_get_rtnl(dev))
1962 		return -EAFNOSUPPORT;
1963 
1964 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1965 					  inet_af_policy, extack);
1966 	if (err < 0)
1967 		return err;
1968 
1969 	if (tb[IFLA_INET_CONF]) {
1970 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1971 			int cfgid = nla_type(a);
1972 
1973 			if (nla_len(a) < 4)
1974 				return -EINVAL;
1975 
1976 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1977 				return -EINVAL;
1978 		}
1979 	}
1980 
1981 	return 0;
1982 }
1983 
1984 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
1985 			    struct netlink_ext_ack *extack)
1986 {
1987 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1988 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1989 	int rem;
1990 
1991 	if (!in_dev)
1992 		return -EAFNOSUPPORT;
1993 
1994 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1995 		return -EINVAL;
1996 
1997 	if (tb[IFLA_INET_CONF]) {
1998 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1999 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2000 	}
2001 
2002 	return 0;
2003 }
2004 
2005 static int inet_netconf_msgsize_devconf(int type)
2006 {
2007 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2008 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2009 	bool all = false;
2010 
2011 	if (type == NETCONFA_ALL)
2012 		all = true;
2013 
2014 	if (all || type == NETCONFA_FORWARDING)
2015 		size += nla_total_size(4);
2016 	if (all || type == NETCONFA_RP_FILTER)
2017 		size += nla_total_size(4);
2018 	if (all || type == NETCONFA_MC_FORWARDING)
2019 		size += nla_total_size(4);
2020 	if (all || type == NETCONFA_BC_FORWARDING)
2021 		size += nla_total_size(4);
2022 	if (all || type == NETCONFA_PROXY_NEIGH)
2023 		size += nla_total_size(4);
2024 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2025 		size += nla_total_size(4);
2026 
2027 	return size;
2028 }
2029 
2030 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2031 				     struct ipv4_devconf *devconf, u32 portid,
2032 				     u32 seq, int event, unsigned int flags,
2033 				     int type)
2034 {
2035 	struct nlmsghdr  *nlh;
2036 	struct netconfmsg *ncm;
2037 	bool all = false;
2038 
2039 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2040 			flags);
2041 	if (!nlh)
2042 		return -EMSGSIZE;
2043 
2044 	if (type == NETCONFA_ALL)
2045 		all = true;
2046 
2047 	ncm = nlmsg_data(nlh);
2048 	ncm->ncm_family = AF_INET;
2049 
2050 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2051 		goto nla_put_failure;
2052 
2053 	if (!devconf)
2054 		goto out;
2055 
2056 	if ((all || type == NETCONFA_FORWARDING) &&
2057 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2058 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2059 		goto nla_put_failure;
2060 	if ((all || type == NETCONFA_RP_FILTER) &&
2061 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2062 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2063 		goto nla_put_failure;
2064 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2065 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2066 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2067 		goto nla_put_failure;
2068 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2069 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2070 			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2071 		goto nla_put_failure;
2072 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2073 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2074 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2075 		goto nla_put_failure;
2076 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2077 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2078 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2079 		goto nla_put_failure;
2080 
2081 out:
2082 	nlmsg_end(skb, nlh);
2083 	return 0;
2084 
2085 nla_put_failure:
2086 	nlmsg_cancel(skb, nlh);
2087 	return -EMSGSIZE;
2088 }
2089 
2090 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2091 				 int ifindex, struct ipv4_devconf *devconf)
2092 {
2093 	struct sk_buff *skb;
2094 	int err = -ENOBUFS;
2095 
2096 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2097 	if (!skb)
2098 		goto errout;
2099 
2100 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2101 					event, 0, type);
2102 	if (err < 0) {
2103 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2104 		WARN_ON(err == -EMSGSIZE);
2105 		kfree_skb(skb);
2106 		goto errout;
2107 	}
2108 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2109 	return;
2110 errout:
2111 	if (err < 0)
2112 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2113 }
2114 
2115 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2116 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2117 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2118 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2119 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2120 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2121 };
2122 
2123 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2124 				      const struct nlmsghdr *nlh,
2125 				      struct nlattr **tb,
2126 				      struct netlink_ext_ack *extack)
2127 {
2128 	int i, err;
2129 
2130 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2131 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2132 		return -EINVAL;
2133 	}
2134 
2135 	if (!netlink_strict_get_check(skb))
2136 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2137 					      tb, NETCONFA_MAX,
2138 					      devconf_ipv4_policy, extack);
2139 
2140 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2141 					    tb, NETCONFA_MAX,
2142 					    devconf_ipv4_policy, extack);
2143 	if (err)
2144 		return err;
2145 
2146 	for (i = 0; i <= NETCONFA_MAX; i++) {
2147 		if (!tb[i])
2148 			continue;
2149 
2150 		switch (i) {
2151 		case NETCONFA_IFINDEX:
2152 			break;
2153 		default:
2154 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2155 			return -EINVAL;
2156 		}
2157 	}
2158 
2159 	return 0;
2160 }
2161 
2162 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2163 				    struct nlmsghdr *nlh,
2164 				    struct netlink_ext_ack *extack)
2165 {
2166 	struct net *net = sock_net(in_skb->sk);
2167 	struct nlattr *tb[NETCONFA_MAX+1];
2168 	struct sk_buff *skb;
2169 	struct ipv4_devconf *devconf;
2170 	struct in_device *in_dev;
2171 	struct net_device *dev;
2172 	int ifindex;
2173 	int err;
2174 
2175 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2176 	if (err)
2177 		goto errout;
2178 
2179 	err = -EINVAL;
2180 	if (!tb[NETCONFA_IFINDEX])
2181 		goto errout;
2182 
2183 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2184 	switch (ifindex) {
2185 	case NETCONFA_IFINDEX_ALL:
2186 		devconf = net->ipv4.devconf_all;
2187 		break;
2188 	case NETCONFA_IFINDEX_DEFAULT:
2189 		devconf = net->ipv4.devconf_dflt;
2190 		break;
2191 	default:
2192 		dev = __dev_get_by_index(net, ifindex);
2193 		if (!dev)
2194 			goto errout;
2195 		in_dev = __in_dev_get_rtnl(dev);
2196 		if (!in_dev)
2197 			goto errout;
2198 		devconf = &in_dev->cnf;
2199 		break;
2200 	}
2201 
2202 	err = -ENOBUFS;
2203 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2204 	if (!skb)
2205 		goto errout;
2206 
2207 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2208 					NETLINK_CB(in_skb).portid,
2209 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2210 					NETCONFA_ALL);
2211 	if (err < 0) {
2212 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2213 		WARN_ON(err == -EMSGSIZE);
2214 		kfree_skb(skb);
2215 		goto errout;
2216 	}
2217 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2218 errout:
2219 	return err;
2220 }
2221 
2222 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2223 				     struct netlink_callback *cb)
2224 {
2225 	const struct nlmsghdr *nlh = cb->nlh;
2226 	struct net *net = sock_net(skb->sk);
2227 	int h, s_h;
2228 	int idx, s_idx;
2229 	struct net_device *dev;
2230 	struct in_device *in_dev;
2231 	struct hlist_head *head;
2232 
2233 	if (cb->strict_check) {
2234 		struct netlink_ext_ack *extack = cb->extack;
2235 		struct netconfmsg *ncm;
2236 
2237 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2238 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2239 			return -EINVAL;
2240 		}
2241 
2242 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2243 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2244 			return -EINVAL;
2245 		}
2246 	}
2247 
2248 	s_h = cb->args[0];
2249 	s_idx = idx = cb->args[1];
2250 
2251 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2252 		idx = 0;
2253 		head = &net->dev_index_head[h];
2254 		rcu_read_lock();
2255 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2256 			  net->dev_base_seq;
2257 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2258 			if (idx < s_idx)
2259 				goto cont;
2260 			in_dev = __in_dev_get_rcu(dev);
2261 			if (!in_dev)
2262 				goto cont;
2263 
2264 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2265 						      &in_dev->cnf,
2266 						      NETLINK_CB(cb->skb).portid,
2267 						      nlh->nlmsg_seq,
2268 						      RTM_NEWNETCONF,
2269 						      NLM_F_MULTI,
2270 						      NETCONFA_ALL) < 0) {
2271 				rcu_read_unlock();
2272 				goto done;
2273 			}
2274 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2275 cont:
2276 			idx++;
2277 		}
2278 		rcu_read_unlock();
2279 	}
2280 	if (h == NETDEV_HASHENTRIES) {
2281 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2282 					      net->ipv4.devconf_all,
2283 					      NETLINK_CB(cb->skb).portid,
2284 					      nlh->nlmsg_seq,
2285 					      RTM_NEWNETCONF, NLM_F_MULTI,
2286 					      NETCONFA_ALL) < 0)
2287 			goto done;
2288 		else
2289 			h++;
2290 	}
2291 	if (h == NETDEV_HASHENTRIES + 1) {
2292 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2293 					      net->ipv4.devconf_dflt,
2294 					      NETLINK_CB(cb->skb).portid,
2295 					      nlh->nlmsg_seq,
2296 					      RTM_NEWNETCONF, NLM_F_MULTI,
2297 					      NETCONFA_ALL) < 0)
2298 			goto done;
2299 		else
2300 			h++;
2301 	}
2302 done:
2303 	cb->args[0] = h;
2304 	cb->args[1] = idx;
2305 
2306 	return skb->len;
2307 }
2308 
2309 #ifdef CONFIG_SYSCTL
2310 
2311 static void devinet_copy_dflt_conf(struct net *net, int i)
2312 {
2313 	struct net_device *dev;
2314 
2315 	rcu_read_lock();
2316 	for_each_netdev_rcu(net, dev) {
2317 		struct in_device *in_dev;
2318 
2319 		in_dev = __in_dev_get_rcu(dev);
2320 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2321 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2322 	}
2323 	rcu_read_unlock();
2324 }
2325 
2326 /* called with RTNL locked */
2327 static void inet_forward_change(struct net *net)
2328 {
2329 	struct net_device *dev;
2330 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2331 
2332 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2333 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2334 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2335 				    NETCONFA_FORWARDING,
2336 				    NETCONFA_IFINDEX_ALL,
2337 				    net->ipv4.devconf_all);
2338 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2339 				    NETCONFA_FORWARDING,
2340 				    NETCONFA_IFINDEX_DEFAULT,
2341 				    net->ipv4.devconf_dflt);
2342 
2343 	for_each_netdev(net, dev) {
2344 		struct in_device *in_dev;
2345 
2346 		if (on)
2347 			dev_disable_lro(dev);
2348 
2349 		in_dev = __in_dev_get_rtnl(dev);
2350 		if (in_dev) {
2351 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2352 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2353 						    NETCONFA_FORWARDING,
2354 						    dev->ifindex, &in_dev->cnf);
2355 		}
2356 	}
2357 }
2358 
2359 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2360 {
2361 	if (cnf == net->ipv4.devconf_dflt)
2362 		return NETCONFA_IFINDEX_DEFAULT;
2363 	else if (cnf == net->ipv4.devconf_all)
2364 		return NETCONFA_IFINDEX_ALL;
2365 	else {
2366 		struct in_device *idev
2367 			= container_of(cnf, struct in_device, cnf);
2368 		return idev->dev->ifindex;
2369 	}
2370 }
2371 
2372 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2373 			     void *buffer, size_t *lenp, loff_t *ppos)
2374 {
2375 	int old_value = *(int *)ctl->data;
2376 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2377 	int new_value = *(int *)ctl->data;
2378 
2379 	if (write) {
2380 		struct ipv4_devconf *cnf = ctl->extra1;
2381 		struct net *net = ctl->extra2;
2382 		int i = (int *)ctl->data - cnf->data;
2383 		int ifindex;
2384 
2385 		set_bit(i, cnf->state);
2386 
2387 		if (cnf == net->ipv4.devconf_dflt)
2388 			devinet_copy_dflt_conf(net, i);
2389 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2390 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2391 			if ((new_value == 0) && (old_value != 0))
2392 				rt_cache_flush(net);
2393 
2394 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2395 		    new_value != old_value)
2396 			rt_cache_flush(net);
2397 
2398 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2399 		    new_value != old_value) {
2400 			ifindex = devinet_conf_ifindex(net, cnf);
2401 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2402 						    NETCONFA_RP_FILTER,
2403 						    ifindex, cnf);
2404 		}
2405 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2406 		    new_value != old_value) {
2407 			ifindex = devinet_conf_ifindex(net, cnf);
2408 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2409 						    NETCONFA_PROXY_NEIGH,
2410 						    ifindex, cnf);
2411 		}
2412 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2413 		    new_value != old_value) {
2414 			ifindex = devinet_conf_ifindex(net, cnf);
2415 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2416 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2417 						    ifindex, cnf);
2418 		}
2419 	}
2420 
2421 	return ret;
2422 }
2423 
2424 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2425 				  void *buffer, size_t *lenp, loff_t *ppos)
2426 {
2427 	int *valp = ctl->data;
2428 	int val = *valp;
2429 	loff_t pos = *ppos;
2430 	struct net *net = ctl->extra2;
2431 	int ret;
2432 
2433 	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2434 		return -EPERM;
2435 
2436 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2437 
2438 	if (write && *valp != val) {
2439 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2440 			if (!rtnl_trylock()) {
2441 				/* Restore the original values before restarting */
2442 				*valp = val;
2443 				*ppos = pos;
2444 				return restart_syscall();
2445 			}
2446 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2447 				inet_forward_change(net);
2448 			} else {
2449 				struct ipv4_devconf *cnf = ctl->extra1;
2450 				struct in_device *idev =
2451 					container_of(cnf, struct in_device, cnf);
2452 				if (*valp)
2453 					dev_disable_lro(idev->dev);
2454 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2455 							    NETCONFA_FORWARDING,
2456 							    idev->dev->ifindex,
2457 							    cnf);
2458 			}
2459 			rtnl_unlock();
2460 			rt_cache_flush(net);
2461 		} else
2462 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2463 						    NETCONFA_FORWARDING,
2464 						    NETCONFA_IFINDEX_DEFAULT,
2465 						    net->ipv4.devconf_dflt);
2466 	}
2467 
2468 	return ret;
2469 }
2470 
2471 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2472 				void *buffer, size_t *lenp, loff_t *ppos)
2473 {
2474 	int *valp = ctl->data;
2475 	int val = *valp;
2476 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2477 	struct net *net = ctl->extra2;
2478 
2479 	if (write && *valp != val)
2480 		rt_cache_flush(net);
2481 
2482 	return ret;
2483 }
2484 
2485 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2486 	{ \
2487 		.procname	= name, \
2488 		.data		= ipv4_devconf.data + \
2489 				  IPV4_DEVCONF_ ## attr - 1, \
2490 		.maxlen		= sizeof(int), \
2491 		.mode		= mval, \
2492 		.proc_handler	= proc, \
2493 		.extra1		= &ipv4_devconf, \
2494 	}
2495 
2496 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2497 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2498 
2499 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2500 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2501 
2502 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2503 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2504 
2505 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2506 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2507 
2508 static struct devinet_sysctl_table {
2509 	struct ctl_table_header *sysctl_header;
2510 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2511 } devinet_sysctl = {
2512 	.devinet_vars = {
2513 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2514 					     devinet_sysctl_forward),
2515 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2516 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2517 
2518 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2519 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2520 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2521 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2522 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2523 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2524 					"accept_source_route"),
2525 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2526 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2527 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2528 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2529 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2530 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2531 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2532 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2533 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2534 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2535 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2536 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2537 		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2538 					"arp_evict_nocarrier"),
2539 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2540 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2541 					"force_igmp_version"),
2542 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2543 					"igmpv2_unsolicited_report_interval"),
2544 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2545 					"igmpv3_unsolicited_report_interval"),
2546 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2547 					"ignore_routes_with_linkdown"),
2548 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2549 					"drop_gratuitous_arp"),
2550 
2551 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2552 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2553 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2554 					      "promote_secondaries"),
2555 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2556 					      "route_localnet"),
2557 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2558 					      "drop_unicast_in_l2_multicast"),
2559 	},
2560 };
2561 
2562 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2563 				     int ifindex, struct ipv4_devconf *p)
2564 {
2565 	int i;
2566 	struct devinet_sysctl_table *t;
2567 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2568 
2569 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2570 	if (!t)
2571 		goto out;
2572 
2573 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2574 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2575 		t->devinet_vars[i].extra1 = p;
2576 		t->devinet_vars[i].extra2 = net;
2577 	}
2578 
2579 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2580 
2581 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2582 	if (!t->sysctl_header)
2583 		goto free;
2584 
2585 	p->sysctl = t;
2586 
2587 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2588 				    ifindex, p);
2589 	return 0;
2590 
2591 free:
2592 	kfree(t);
2593 out:
2594 	return -ENOMEM;
2595 }
2596 
2597 static void __devinet_sysctl_unregister(struct net *net,
2598 					struct ipv4_devconf *cnf, int ifindex)
2599 {
2600 	struct devinet_sysctl_table *t = cnf->sysctl;
2601 
2602 	if (t) {
2603 		cnf->sysctl = NULL;
2604 		unregister_net_sysctl_table(t->sysctl_header);
2605 		kfree(t);
2606 	}
2607 
2608 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2609 }
2610 
2611 static int devinet_sysctl_register(struct in_device *idev)
2612 {
2613 	int err;
2614 
2615 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2616 		return -EINVAL;
2617 
2618 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2619 	if (err)
2620 		return err;
2621 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2622 					idev->dev->ifindex, &idev->cnf);
2623 	if (err)
2624 		neigh_sysctl_unregister(idev->arp_parms);
2625 	return err;
2626 }
2627 
2628 static void devinet_sysctl_unregister(struct in_device *idev)
2629 {
2630 	struct net *net = dev_net(idev->dev);
2631 
2632 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2633 	neigh_sysctl_unregister(idev->arp_parms);
2634 }
2635 
2636 static struct ctl_table ctl_forward_entry[] = {
2637 	{
2638 		.procname	= "ip_forward",
2639 		.data		= &ipv4_devconf.data[
2640 					IPV4_DEVCONF_FORWARDING - 1],
2641 		.maxlen		= sizeof(int),
2642 		.mode		= 0644,
2643 		.proc_handler	= devinet_sysctl_forward,
2644 		.extra1		= &ipv4_devconf,
2645 		.extra2		= &init_net,
2646 	},
2647 	{ },
2648 };
2649 #endif
2650 
2651 static __net_init int devinet_init_net(struct net *net)
2652 {
2653 	int err;
2654 	struct ipv4_devconf *all, *dflt;
2655 #ifdef CONFIG_SYSCTL
2656 	struct ctl_table *tbl;
2657 	struct ctl_table_header *forw_hdr;
2658 #endif
2659 
2660 	err = -ENOMEM;
2661 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2662 	if (!all)
2663 		goto err_alloc_all;
2664 
2665 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2666 	if (!dflt)
2667 		goto err_alloc_dflt;
2668 
2669 #ifdef CONFIG_SYSCTL
2670 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2671 	if (!tbl)
2672 		goto err_alloc_ctl;
2673 
2674 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2675 	tbl[0].extra1 = all;
2676 	tbl[0].extra2 = net;
2677 #endif
2678 
2679 	if (!net_eq(net, &init_net)) {
2680 		if (IS_ENABLED(CONFIG_SYSCTL) &&
2681 		    sysctl_devconf_inherit_init_net == 3) {
2682 			/* copy from the current netns */
2683 			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2684 			       sizeof(ipv4_devconf));
2685 			memcpy(dflt,
2686 			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2687 			       sizeof(ipv4_devconf_dflt));
2688 		} else if (!IS_ENABLED(CONFIG_SYSCTL) ||
2689 			   sysctl_devconf_inherit_init_net != 2) {
2690 			/* inherit == 0 or 1: copy from init_net */
2691 			memcpy(all, init_net.ipv4.devconf_all,
2692 			       sizeof(ipv4_devconf));
2693 			memcpy(dflt, init_net.ipv4.devconf_dflt,
2694 			       sizeof(ipv4_devconf_dflt));
2695 		}
2696 		/* else inherit == 2: use compiled values */
2697 	}
2698 
2699 #ifdef CONFIG_SYSCTL
2700 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2701 	if (err < 0)
2702 		goto err_reg_all;
2703 
2704 	err = __devinet_sysctl_register(net, "default",
2705 					NETCONFA_IFINDEX_DEFAULT, dflt);
2706 	if (err < 0)
2707 		goto err_reg_dflt;
2708 
2709 	err = -ENOMEM;
2710 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2711 	if (!forw_hdr)
2712 		goto err_reg_ctl;
2713 	net->ipv4.forw_hdr = forw_hdr;
2714 #endif
2715 
2716 	net->ipv4.devconf_all = all;
2717 	net->ipv4.devconf_dflt = dflt;
2718 	return 0;
2719 
2720 #ifdef CONFIG_SYSCTL
2721 err_reg_ctl:
2722 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2723 err_reg_dflt:
2724 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2725 err_reg_all:
2726 	kfree(tbl);
2727 err_alloc_ctl:
2728 #endif
2729 	kfree(dflt);
2730 err_alloc_dflt:
2731 	kfree(all);
2732 err_alloc_all:
2733 	return err;
2734 }
2735 
2736 static __net_exit void devinet_exit_net(struct net *net)
2737 {
2738 #ifdef CONFIG_SYSCTL
2739 	struct ctl_table *tbl;
2740 
2741 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2742 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2743 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2744 				    NETCONFA_IFINDEX_DEFAULT);
2745 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2746 				    NETCONFA_IFINDEX_ALL);
2747 	kfree(tbl);
2748 #endif
2749 	kfree(net->ipv4.devconf_dflt);
2750 	kfree(net->ipv4.devconf_all);
2751 }
2752 
2753 static __net_initdata struct pernet_operations devinet_ops = {
2754 	.init = devinet_init_net,
2755 	.exit = devinet_exit_net,
2756 };
2757 
2758 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2759 	.family		  = AF_INET,
2760 	.fill_link_af	  = inet_fill_link_af,
2761 	.get_link_af_size = inet_get_link_af_size,
2762 	.validate_link_af = inet_validate_link_af,
2763 	.set_link_af	  = inet_set_link_af,
2764 };
2765 
2766 void __init devinet_init(void)
2767 {
2768 	int i;
2769 
2770 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2771 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2772 
2773 	register_pernet_subsys(&devinet_ops);
2774 	register_netdevice_notifier(&ip_netdev_notifier);
2775 
2776 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2777 
2778 	rtnl_af_register(&inet_af_ops);
2779 
2780 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2781 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2782 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2783 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2784 		      inet_netconf_dump_devconf, 0);
2785 }
2786