xref: /openbmc/linux/net/ipv4/devinet.c (revision bbb774d9)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	NET3	IP device support routines.
4  *
5  *	Derived from the IP parts of dev.c 1.0.19
6  * 		Authors:	Ross Biro
7  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *	Additional Authors:
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *	Changes:
15  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16  *					lists.
17  *		Cyrus Durgin:		updated for kmod
18  *		Matthias Andree:	in devinet_ioctl, compare label and
19  *					address (4.4BSD alias style support),
20  *					fall back to comparing just the label
21  *					if no match found.
22  */
23 
24 
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56 
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64 
65 #define IPV6ONLY_FLAGS	\
66 		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79 	},
80 };
81 
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83 	.data = {
84 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92 	},
93 };
94 
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97 
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 	[IFA_LOCAL]     	= { .type = NLA_U32 },
100 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
101 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104 	[IFA_FLAGS]		= { .type = NLA_U32 },
105 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107 	[IFA_PROTO]		= { .type = NLA_U8 },
108 };
109 
110 struct inet_fill_args {
111 	u32 portid;
112 	u32 seq;
113 	int event;
114 	unsigned int flags;
115 	int netnsid;
116 	int ifindex;
117 };
118 
119 #define IN4_ADDR_HSIZE_SHIFT	8
120 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
121 
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123 
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
125 {
126 	u32 val = (__force u32) addr ^ net_hash_mix(net);
127 
128 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129 }
130 
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132 {
133 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
134 
135 	ASSERT_RTNL();
136 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137 }
138 
139 static void inet_hash_remove(struct in_ifaddr *ifa)
140 {
141 	ASSERT_RTNL();
142 	hlist_del_init_rcu(&ifa->hash);
143 }
144 
145 /**
146  * __ip_dev_find - find the first device with a given source address.
147  * @net: the net namespace
148  * @addr: the source address
149  * @devref: if true, take a reference on the found device
150  *
151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
152  */
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154 {
155 	struct net_device *result = NULL;
156 	struct in_ifaddr *ifa;
157 
158 	rcu_read_lock();
159 	ifa = inet_lookup_ifaddr_rcu(net, addr);
160 	if (!ifa) {
161 		struct flowi4 fl4 = { .daddr = addr };
162 		struct fib_result res = { 0 };
163 		struct fib_table *local;
164 
165 		/* Fallback to FIB local table so that communication
166 		 * over loopback subnets work.
167 		 */
168 		local = fib_get_table(net, RT_TABLE_LOCAL);
169 		if (local &&
170 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171 		    res.type == RTN_LOCAL)
172 			result = FIB_RES_DEV(res);
173 	} else {
174 		result = ifa->ifa_dev->dev;
175 	}
176 	if (result && devref)
177 		dev_hold(result);
178 	rcu_read_unlock();
179 	return result;
180 }
181 EXPORT_SYMBOL(__ip_dev_find);
182 
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185 {
186 	u32 hash = inet_addr_hash(net, addr);
187 	struct in_ifaddr *ifa;
188 
189 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190 		if (ifa->ifa_local == addr &&
191 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
192 			return ifa;
193 
194 	return NULL;
195 }
196 
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198 
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202 			 struct in_ifaddr __rcu **ifap,
203 			 int destroy);
204 #ifdef CONFIG_SYSCTL
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
207 #else
208 static int devinet_sysctl_register(struct in_device *idev)
209 {
210 	return 0;
211 }
212 static void devinet_sysctl_unregister(struct in_device *idev)
213 {
214 }
215 #endif
216 
217 /* Locks all the inet devices. */
218 
219 static struct in_ifaddr *inet_alloc_ifa(void)
220 {
221 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222 }
223 
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227 	if (ifa->ifa_dev)
228 		in_dev_put(ifa->ifa_dev);
229 	kfree(ifa);
230 }
231 
232 static void inet_free_ifa(struct in_ifaddr *ifa)
233 {
234 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
235 }
236 
237 void in_dev_finish_destroy(struct in_device *idev)
238 {
239 	struct net_device *dev = idev->dev;
240 
241 	WARN_ON(idev->ifa_list);
242 	WARN_ON(idev->mc_list);
243 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
244 #ifdef NET_REFCNT_DEBUG
245 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
246 #endif
247 	netdev_put(dev, &idev->dev_tracker);
248 	if (!idev->dead)
249 		pr_err("Freeing alive in_device %p\n", idev);
250 	else
251 		kfree(idev);
252 }
253 EXPORT_SYMBOL(in_dev_finish_destroy);
254 
255 static struct in_device *inetdev_init(struct net_device *dev)
256 {
257 	struct in_device *in_dev;
258 	int err = -ENOMEM;
259 
260 	ASSERT_RTNL();
261 
262 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
263 	if (!in_dev)
264 		goto out;
265 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
266 			sizeof(in_dev->cnf));
267 	in_dev->cnf.sysctl = NULL;
268 	in_dev->dev = dev;
269 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
270 	if (!in_dev->arp_parms)
271 		goto out_kfree;
272 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
273 		dev_disable_lro(dev);
274 	/* Reference in_dev->dev */
275 	netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
276 	/* Account for reference dev->ip_ptr (below) */
277 	refcount_set(&in_dev->refcnt, 1);
278 
279 	err = devinet_sysctl_register(in_dev);
280 	if (err) {
281 		in_dev->dead = 1;
282 		neigh_parms_release(&arp_tbl, in_dev->arp_parms);
283 		in_dev_put(in_dev);
284 		in_dev = NULL;
285 		goto out;
286 	}
287 	ip_mc_init_dev(in_dev);
288 	if (dev->flags & IFF_UP)
289 		ip_mc_up(in_dev);
290 
291 	/* we can receive as soon as ip_ptr is set -- do this last */
292 	rcu_assign_pointer(dev->ip_ptr, in_dev);
293 out:
294 	return in_dev ?: ERR_PTR(err);
295 out_kfree:
296 	kfree(in_dev);
297 	in_dev = NULL;
298 	goto out;
299 }
300 
301 static void in_dev_rcu_put(struct rcu_head *head)
302 {
303 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
304 	in_dev_put(idev);
305 }
306 
307 static void inetdev_destroy(struct in_device *in_dev)
308 {
309 	struct net_device *dev;
310 	struct in_ifaddr *ifa;
311 
312 	ASSERT_RTNL();
313 
314 	dev = in_dev->dev;
315 
316 	in_dev->dead = 1;
317 
318 	ip_mc_destroy_dev(in_dev);
319 
320 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
321 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
322 		inet_free_ifa(ifa);
323 	}
324 
325 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
326 
327 	devinet_sysctl_unregister(in_dev);
328 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
329 	arp_ifdown(dev);
330 
331 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
332 }
333 
334 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
335 {
336 	const struct in_ifaddr *ifa;
337 
338 	rcu_read_lock();
339 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
340 		if (inet_ifa_match(a, ifa)) {
341 			if (!b || inet_ifa_match(b, ifa)) {
342 				rcu_read_unlock();
343 				return 1;
344 			}
345 		}
346 	}
347 	rcu_read_unlock();
348 	return 0;
349 }
350 
351 static void __inet_del_ifa(struct in_device *in_dev,
352 			   struct in_ifaddr __rcu **ifap,
353 			   int destroy, struct nlmsghdr *nlh, u32 portid)
354 {
355 	struct in_ifaddr *promote = NULL;
356 	struct in_ifaddr *ifa, *ifa1;
357 	struct in_ifaddr *last_prim;
358 	struct in_ifaddr *prev_prom = NULL;
359 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
360 
361 	ASSERT_RTNL();
362 
363 	ifa1 = rtnl_dereference(*ifap);
364 	last_prim = rtnl_dereference(in_dev->ifa_list);
365 	if (in_dev->dead)
366 		goto no_promotions;
367 
368 	/* 1. Deleting primary ifaddr forces deletion all secondaries
369 	 * unless alias promotion is set
370 	 **/
371 
372 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
373 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
374 
375 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
376 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
377 			    ifa1->ifa_scope <= ifa->ifa_scope)
378 				last_prim = ifa;
379 
380 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
381 			    ifa1->ifa_mask != ifa->ifa_mask ||
382 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
383 				ifap1 = &ifa->ifa_next;
384 				prev_prom = ifa;
385 				continue;
386 			}
387 
388 			if (!do_promote) {
389 				inet_hash_remove(ifa);
390 				*ifap1 = ifa->ifa_next;
391 
392 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
393 				blocking_notifier_call_chain(&inetaddr_chain,
394 						NETDEV_DOWN, ifa);
395 				inet_free_ifa(ifa);
396 			} else {
397 				promote = ifa;
398 				break;
399 			}
400 		}
401 	}
402 
403 	/* On promotion all secondaries from subnet are changing
404 	 * the primary IP, we must remove all their routes silently
405 	 * and later to add them back with new prefsrc. Do this
406 	 * while all addresses are on the device list.
407 	 */
408 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
409 		if (ifa1->ifa_mask == ifa->ifa_mask &&
410 		    inet_ifa_match(ifa1->ifa_address, ifa))
411 			fib_del_ifaddr(ifa, ifa1);
412 	}
413 
414 no_promotions:
415 	/* 2. Unlink it */
416 
417 	*ifap = ifa1->ifa_next;
418 	inet_hash_remove(ifa1);
419 
420 	/* 3. Announce address deletion */
421 
422 	/* Send message first, then call notifier.
423 	   At first sight, FIB update triggered by notifier
424 	   will refer to already deleted ifaddr, that could confuse
425 	   netlink listeners. It is not true: look, gated sees
426 	   that route deleted and if it still thinks that ifaddr
427 	   is valid, it will try to restore deleted routes... Grr.
428 	   So that, this order is correct.
429 	 */
430 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
431 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
432 
433 	if (promote) {
434 		struct in_ifaddr *next_sec;
435 
436 		next_sec = rtnl_dereference(promote->ifa_next);
437 		if (prev_prom) {
438 			struct in_ifaddr *last_sec;
439 
440 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
441 
442 			last_sec = rtnl_dereference(last_prim->ifa_next);
443 			rcu_assign_pointer(promote->ifa_next, last_sec);
444 			rcu_assign_pointer(last_prim->ifa_next, promote);
445 		}
446 
447 		promote->ifa_flags &= ~IFA_F_SECONDARY;
448 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
449 		blocking_notifier_call_chain(&inetaddr_chain,
450 				NETDEV_UP, promote);
451 		for (ifa = next_sec; ifa;
452 		     ifa = rtnl_dereference(ifa->ifa_next)) {
453 			if (ifa1->ifa_mask != ifa->ifa_mask ||
454 			    !inet_ifa_match(ifa1->ifa_address, ifa))
455 					continue;
456 			fib_add_ifaddr(ifa);
457 		}
458 
459 	}
460 	if (destroy)
461 		inet_free_ifa(ifa1);
462 }
463 
464 static void inet_del_ifa(struct in_device *in_dev,
465 			 struct in_ifaddr __rcu **ifap,
466 			 int destroy)
467 {
468 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
469 }
470 
471 static void check_lifetime(struct work_struct *work);
472 
473 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
474 
475 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
476 			     u32 portid, struct netlink_ext_ack *extack)
477 {
478 	struct in_ifaddr __rcu **last_primary, **ifap;
479 	struct in_device *in_dev = ifa->ifa_dev;
480 	struct in_validator_info ivi;
481 	struct in_ifaddr *ifa1;
482 	int ret;
483 
484 	ASSERT_RTNL();
485 
486 	if (!ifa->ifa_local) {
487 		inet_free_ifa(ifa);
488 		return 0;
489 	}
490 
491 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
492 	last_primary = &in_dev->ifa_list;
493 
494 	/* Don't set IPv6 only flags to IPv4 addresses */
495 	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
496 
497 	ifap = &in_dev->ifa_list;
498 	ifa1 = rtnl_dereference(*ifap);
499 
500 	while (ifa1) {
501 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
502 		    ifa->ifa_scope <= ifa1->ifa_scope)
503 			last_primary = &ifa1->ifa_next;
504 		if (ifa1->ifa_mask == ifa->ifa_mask &&
505 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
506 			if (ifa1->ifa_local == ifa->ifa_local) {
507 				inet_free_ifa(ifa);
508 				return -EEXIST;
509 			}
510 			if (ifa1->ifa_scope != ifa->ifa_scope) {
511 				inet_free_ifa(ifa);
512 				return -EINVAL;
513 			}
514 			ifa->ifa_flags |= IFA_F_SECONDARY;
515 		}
516 
517 		ifap = &ifa1->ifa_next;
518 		ifa1 = rtnl_dereference(*ifap);
519 	}
520 
521 	/* Allow any devices that wish to register ifaddr validtors to weigh
522 	 * in now, before changes are committed.  The rntl lock is serializing
523 	 * access here, so the state should not change between a validator call
524 	 * and a final notify on commit.  This isn't invoked on promotion under
525 	 * the assumption that validators are checking the address itself, and
526 	 * not the flags.
527 	 */
528 	ivi.ivi_addr = ifa->ifa_address;
529 	ivi.ivi_dev = ifa->ifa_dev;
530 	ivi.extack = extack;
531 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
532 					   NETDEV_UP, &ivi);
533 	ret = notifier_to_errno(ret);
534 	if (ret) {
535 		inet_free_ifa(ifa);
536 		return ret;
537 	}
538 
539 	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
540 		ifap = last_primary;
541 
542 	rcu_assign_pointer(ifa->ifa_next, *ifap);
543 	rcu_assign_pointer(*ifap, ifa);
544 
545 	inet_hash_insert(dev_net(in_dev->dev), ifa);
546 
547 	cancel_delayed_work(&check_lifetime_work);
548 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
549 
550 	/* Send message first, then call notifier.
551 	   Notifier will trigger FIB update, so that
552 	   listeners of netlink will know about new ifaddr */
553 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
554 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
555 
556 	return 0;
557 }
558 
559 static int inet_insert_ifa(struct in_ifaddr *ifa)
560 {
561 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
562 }
563 
564 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
565 {
566 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
567 
568 	ASSERT_RTNL();
569 
570 	if (!in_dev) {
571 		inet_free_ifa(ifa);
572 		return -ENOBUFS;
573 	}
574 	ipv4_devconf_setall(in_dev);
575 	neigh_parms_data_state_setall(in_dev->arp_parms);
576 	if (ifa->ifa_dev != in_dev) {
577 		WARN_ON(ifa->ifa_dev);
578 		in_dev_hold(in_dev);
579 		ifa->ifa_dev = in_dev;
580 	}
581 	if (ipv4_is_loopback(ifa->ifa_local))
582 		ifa->ifa_scope = RT_SCOPE_HOST;
583 	return inet_insert_ifa(ifa);
584 }
585 
586 /* Caller must hold RCU or RTNL :
587  * We dont take a reference on found in_device
588  */
589 struct in_device *inetdev_by_index(struct net *net, int ifindex)
590 {
591 	struct net_device *dev;
592 	struct in_device *in_dev = NULL;
593 
594 	rcu_read_lock();
595 	dev = dev_get_by_index_rcu(net, ifindex);
596 	if (dev)
597 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
598 	rcu_read_unlock();
599 	return in_dev;
600 }
601 EXPORT_SYMBOL(inetdev_by_index);
602 
603 /* Called only from RTNL semaphored context. No locks. */
604 
605 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
606 				    __be32 mask)
607 {
608 	struct in_ifaddr *ifa;
609 
610 	ASSERT_RTNL();
611 
612 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
613 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
614 			return ifa;
615 	}
616 	return NULL;
617 }
618 
619 static int ip_mc_autojoin_config(struct net *net, bool join,
620 				 const struct in_ifaddr *ifa)
621 {
622 #if defined(CONFIG_IP_MULTICAST)
623 	struct ip_mreqn mreq = {
624 		.imr_multiaddr.s_addr = ifa->ifa_address,
625 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
626 	};
627 	struct sock *sk = net->ipv4.mc_autojoin_sk;
628 	int ret;
629 
630 	ASSERT_RTNL();
631 
632 	lock_sock(sk);
633 	if (join)
634 		ret = ip_mc_join_group(sk, &mreq);
635 	else
636 		ret = ip_mc_leave_group(sk, &mreq);
637 	release_sock(sk);
638 
639 	return ret;
640 #else
641 	return -EOPNOTSUPP;
642 #endif
643 }
644 
645 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
646 			    struct netlink_ext_ack *extack)
647 {
648 	struct net *net = sock_net(skb->sk);
649 	struct in_ifaddr __rcu **ifap;
650 	struct nlattr *tb[IFA_MAX+1];
651 	struct in_device *in_dev;
652 	struct ifaddrmsg *ifm;
653 	struct in_ifaddr *ifa;
654 	int err;
655 
656 	ASSERT_RTNL();
657 
658 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
659 				     ifa_ipv4_policy, extack);
660 	if (err < 0)
661 		goto errout;
662 
663 	ifm = nlmsg_data(nlh);
664 	in_dev = inetdev_by_index(net, ifm->ifa_index);
665 	if (!in_dev) {
666 		err = -ENODEV;
667 		goto errout;
668 	}
669 
670 	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
671 	     ifap = &ifa->ifa_next) {
672 		if (tb[IFA_LOCAL] &&
673 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
674 			continue;
675 
676 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
677 			continue;
678 
679 		if (tb[IFA_ADDRESS] &&
680 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
681 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
682 			continue;
683 
684 		if (ipv4_is_multicast(ifa->ifa_address))
685 			ip_mc_autojoin_config(net, false, ifa);
686 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
687 		return 0;
688 	}
689 
690 	err = -EADDRNOTAVAIL;
691 errout:
692 	return err;
693 }
694 
695 #define INFINITY_LIFE_TIME	0xFFFFFFFF
696 
697 static void check_lifetime(struct work_struct *work)
698 {
699 	unsigned long now, next, next_sec, next_sched;
700 	struct in_ifaddr *ifa;
701 	struct hlist_node *n;
702 	int i;
703 
704 	now = jiffies;
705 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
706 
707 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
708 		bool change_needed = false;
709 
710 		rcu_read_lock();
711 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
712 			unsigned long age;
713 
714 			if (ifa->ifa_flags & IFA_F_PERMANENT)
715 				continue;
716 
717 			/* We try to batch several events at once. */
718 			age = (now - ifa->ifa_tstamp +
719 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
720 
721 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
722 			    age >= ifa->ifa_valid_lft) {
723 				change_needed = true;
724 			} else if (ifa->ifa_preferred_lft ==
725 				   INFINITY_LIFE_TIME) {
726 				continue;
727 			} else if (age >= ifa->ifa_preferred_lft) {
728 				if (time_before(ifa->ifa_tstamp +
729 						ifa->ifa_valid_lft * HZ, next))
730 					next = ifa->ifa_tstamp +
731 					       ifa->ifa_valid_lft * HZ;
732 
733 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
734 					change_needed = true;
735 			} else if (time_before(ifa->ifa_tstamp +
736 					       ifa->ifa_preferred_lft * HZ,
737 					       next)) {
738 				next = ifa->ifa_tstamp +
739 				       ifa->ifa_preferred_lft * HZ;
740 			}
741 		}
742 		rcu_read_unlock();
743 		if (!change_needed)
744 			continue;
745 		rtnl_lock();
746 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
747 			unsigned long age;
748 
749 			if (ifa->ifa_flags & IFA_F_PERMANENT)
750 				continue;
751 
752 			/* We try to batch several events at once. */
753 			age = (now - ifa->ifa_tstamp +
754 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
755 
756 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
757 			    age >= ifa->ifa_valid_lft) {
758 				struct in_ifaddr __rcu **ifap;
759 				struct in_ifaddr *tmp;
760 
761 				ifap = &ifa->ifa_dev->ifa_list;
762 				tmp = rtnl_dereference(*ifap);
763 				while (tmp) {
764 					if (tmp == ifa) {
765 						inet_del_ifa(ifa->ifa_dev,
766 							     ifap, 1);
767 						break;
768 					}
769 					ifap = &tmp->ifa_next;
770 					tmp = rtnl_dereference(*ifap);
771 				}
772 			} else if (ifa->ifa_preferred_lft !=
773 				   INFINITY_LIFE_TIME &&
774 				   age >= ifa->ifa_preferred_lft &&
775 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
776 				ifa->ifa_flags |= IFA_F_DEPRECATED;
777 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
778 			}
779 		}
780 		rtnl_unlock();
781 	}
782 
783 	next_sec = round_jiffies_up(next);
784 	next_sched = next;
785 
786 	/* If rounded timeout is accurate enough, accept it. */
787 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
788 		next_sched = next_sec;
789 
790 	now = jiffies;
791 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
792 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
793 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
794 
795 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
796 			next_sched - now);
797 }
798 
799 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
800 			     __u32 prefered_lft)
801 {
802 	unsigned long timeout;
803 
804 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
805 
806 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
807 	if (addrconf_finite_timeout(timeout))
808 		ifa->ifa_valid_lft = timeout;
809 	else
810 		ifa->ifa_flags |= IFA_F_PERMANENT;
811 
812 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
813 	if (addrconf_finite_timeout(timeout)) {
814 		if (timeout == 0)
815 			ifa->ifa_flags |= IFA_F_DEPRECATED;
816 		ifa->ifa_preferred_lft = timeout;
817 	}
818 	ifa->ifa_tstamp = jiffies;
819 	if (!ifa->ifa_cstamp)
820 		ifa->ifa_cstamp = ifa->ifa_tstamp;
821 }
822 
823 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
824 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
825 				       struct netlink_ext_ack *extack)
826 {
827 	struct nlattr *tb[IFA_MAX+1];
828 	struct in_ifaddr *ifa;
829 	struct ifaddrmsg *ifm;
830 	struct net_device *dev;
831 	struct in_device *in_dev;
832 	int err;
833 
834 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
835 				     ifa_ipv4_policy, extack);
836 	if (err < 0)
837 		goto errout;
838 
839 	ifm = nlmsg_data(nlh);
840 	err = -EINVAL;
841 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
842 		goto errout;
843 
844 	dev = __dev_get_by_index(net, ifm->ifa_index);
845 	err = -ENODEV;
846 	if (!dev)
847 		goto errout;
848 
849 	in_dev = __in_dev_get_rtnl(dev);
850 	err = -ENOBUFS;
851 	if (!in_dev)
852 		goto errout;
853 
854 	ifa = inet_alloc_ifa();
855 	if (!ifa)
856 		/*
857 		 * A potential indev allocation can be left alive, it stays
858 		 * assigned to its device and is destroy with it.
859 		 */
860 		goto errout;
861 
862 	ipv4_devconf_setall(in_dev);
863 	neigh_parms_data_state_setall(in_dev->arp_parms);
864 	in_dev_hold(in_dev);
865 
866 	if (!tb[IFA_ADDRESS])
867 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
868 
869 	INIT_HLIST_NODE(&ifa->hash);
870 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
871 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
872 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
873 					 ifm->ifa_flags;
874 	ifa->ifa_scope = ifm->ifa_scope;
875 	ifa->ifa_dev = in_dev;
876 
877 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
878 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
879 
880 	if (tb[IFA_BROADCAST])
881 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
882 
883 	if (tb[IFA_LABEL])
884 		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
885 	else
886 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
887 
888 	if (tb[IFA_RT_PRIORITY])
889 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
890 
891 	if (tb[IFA_PROTO])
892 		ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
893 
894 	if (tb[IFA_CACHEINFO]) {
895 		struct ifa_cacheinfo *ci;
896 
897 		ci = nla_data(tb[IFA_CACHEINFO]);
898 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
899 			err = -EINVAL;
900 			goto errout_free;
901 		}
902 		*pvalid_lft = ci->ifa_valid;
903 		*pprefered_lft = ci->ifa_prefered;
904 	}
905 
906 	return ifa;
907 
908 errout_free:
909 	inet_free_ifa(ifa);
910 errout:
911 	return ERR_PTR(err);
912 }
913 
914 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
915 {
916 	struct in_device *in_dev = ifa->ifa_dev;
917 	struct in_ifaddr *ifa1;
918 
919 	if (!ifa->ifa_local)
920 		return NULL;
921 
922 	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
923 		if (ifa1->ifa_mask == ifa->ifa_mask &&
924 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
925 		    ifa1->ifa_local == ifa->ifa_local)
926 			return ifa1;
927 	}
928 	return NULL;
929 }
930 
931 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
932 			    struct netlink_ext_ack *extack)
933 {
934 	struct net *net = sock_net(skb->sk);
935 	struct in_ifaddr *ifa;
936 	struct in_ifaddr *ifa_existing;
937 	__u32 valid_lft = INFINITY_LIFE_TIME;
938 	__u32 prefered_lft = INFINITY_LIFE_TIME;
939 
940 	ASSERT_RTNL();
941 
942 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
943 	if (IS_ERR(ifa))
944 		return PTR_ERR(ifa);
945 
946 	ifa_existing = find_matching_ifa(ifa);
947 	if (!ifa_existing) {
948 		/* It would be best to check for !NLM_F_CREATE here but
949 		 * userspace already relies on not having to provide this.
950 		 */
951 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
952 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
953 			int ret = ip_mc_autojoin_config(net, true, ifa);
954 
955 			if (ret < 0) {
956 				inet_free_ifa(ifa);
957 				return ret;
958 			}
959 		}
960 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
961 					 extack);
962 	} else {
963 		u32 new_metric = ifa->ifa_rt_priority;
964 
965 		inet_free_ifa(ifa);
966 
967 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
968 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
969 			return -EEXIST;
970 		ifa = ifa_existing;
971 
972 		if (ifa->ifa_rt_priority != new_metric) {
973 			fib_modify_prefix_metric(ifa, new_metric);
974 			ifa->ifa_rt_priority = new_metric;
975 		}
976 
977 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
978 		cancel_delayed_work(&check_lifetime_work);
979 		queue_delayed_work(system_power_efficient_wq,
980 				&check_lifetime_work, 0);
981 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
982 	}
983 	return 0;
984 }
985 
986 /*
987  *	Determine a default network mask, based on the IP address.
988  */
989 
990 static int inet_abc_len(__be32 addr)
991 {
992 	int rc = -1;	/* Something else, probably a multicast. */
993 
994 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
995 		rc = 0;
996 	else {
997 		__u32 haddr = ntohl(addr);
998 		if (IN_CLASSA(haddr))
999 			rc = 8;
1000 		else if (IN_CLASSB(haddr))
1001 			rc = 16;
1002 		else if (IN_CLASSC(haddr))
1003 			rc = 24;
1004 		else if (IN_CLASSE(haddr))
1005 			rc = 32;
1006 	}
1007 
1008 	return rc;
1009 }
1010 
1011 
1012 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1013 {
1014 	struct sockaddr_in sin_orig;
1015 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1016 	struct in_ifaddr __rcu **ifap = NULL;
1017 	struct in_device *in_dev;
1018 	struct in_ifaddr *ifa = NULL;
1019 	struct net_device *dev;
1020 	char *colon;
1021 	int ret = -EFAULT;
1022 	int tryaddrmatch = 0;
1023 
1024 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1025 
1026 	/* save original address for comparison */
1027 	memcpy(&sin_orig, sin, sizeof(*sin));
1028 
1029 	colon = strchr(ifr->ifr_name, ':');
1030 	if (colon)
1031 		*colon = 0;
1032 
1033 	dev_load(net, ifr->ifr_name);
1034 
1035 	switch (cmd) {
1036 	case SIOCGIFADDR:	/* Get interface address */
1037 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1038 	case SIOCGIFDSTADDR:	/* Get the destination address */
1039 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1040 		/* Note that these ioctls will not sleep,
1041 		   so that we do not impose a lock.
1042 		   One day we will be forced to put shlock here (I mean SMP)
1043 		 */
1044 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1045 		memset(sin, 0, sizeof(*sin));
1046 		sin->sin_family = AF_INET;
1047 		break;
1048 
1049 	case SIOCSIFFLAGS:
1050 		ret = -EPERM;
1051 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1052 			goto out;
1053 		break;
1054 	case SIOCSIFADDR:	/* Set interface address (and family) */
1055 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1056 	case SIOCSIFDSTADDR:	/* Set the destination address */
1057 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1058 		ret = -EPERM;
1059 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1060 			goto out;
1061 		ret = -EINVAL;
1062 		if (sin->sin_family != AF_INET)
1063 			goto out;
1064 		break;
1065 	default:
1066 		ret = -EINVAL;
1067 		goto out;
1068 	}
1069 
1070 	rtnl_lock();
1071 
1072 	ret = -ENODEV;
1073 	dev = __dev_get_by_name(net, ifr->ifr_name);
1074 	if (!dev)
1075 		goto done;
1076 
1077 	if (colon)
1078 		*colon = ':';
1079 
1080 	in_dev = __in_dev_get_rtnl(dev);
1081 	if (in_dev) {
1082 		if (tryaddrmatch) {
1083 			/* Matthias Andree */
1084 			/* compare label and address (4.4BSD style) */
1085 			/* note: we only do this for a limited set of ioctls
1086 			   and only if the original address family was AF_INET.
1087 			   This is checked above. */
1088 
1089 			for (ifap = &in_dev->ifa_list;
1090 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1091 			     ifap = &ifa->ifa_next) {
1092 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1093 				    sin_orig.sin_addr.s_addr ==
1094 							ifa->ifa_local) {
1095 					break; /* found */
1096 				}
1097 			}
1098 		}
1099 		/* we didn't get a match, maybe the application is
1100 		   4.3BSD-style and passed in junk so we fall back to
1101 		   comparing just the label */
1102 		if (!ifa) {
1103 			for (ifap = &in_dev->ifa_list;
1104 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1105 			     ifap = &ifa->ifa_next)
1106 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1107 					break;
1108 		}
1109 	}
1110 
1111 	ret = -EADDRNOTAVAIL;
1112 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1113 		goto done;
1114 
1115 	switch (cmd) {
1116 	case SIOCGIFADDR:	/* Get interface address */
1117 		ret = 0;
1118 		sin->sin_addr.s_addr = ifa->ifa_local;
1119 		break;
1120 
1121 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1122 		ret = 0;
1123 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1124 		break;
1125 
1126 	case SIOCGIFDSTADDR:	/* Get the destination address */
1127 		ret = 0;
1128 		sin->sin_addr.s_addr = ifa->ifa_address;
1129 		break;
1130 
1131 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1132 		ret = 0;
1133 		sin->sin_addr.s_addr = ifa->ifa_mask;
1134 		break;
1135 
1136 	case SIOCSIFFLAGS:
1137 		if (colon) {
1138 			ret = -EADDRNOTAVAIL;
1139 			if (!ifa)
1140 				break;
1141 			ret = 0;
1142 			if (!(ifr->ifr_flags & IFF_UP))
1143 				inet_del_ifa(in_dev, ifap, 1);
1144 			break;
1145 		}
1146 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1147 		break;
1148 
1149 	case SIOCSIFADDR:	/* Set interface address (and family) */
1150 		ret = -EINVAL;
1151 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1152 			break;
1153 
1154 		if (!ifa) {
1155 			ret = -ENOBUFS;
1156 			ifa = inet_alloc_ifa();
1157 			if (!ifa)
1158 				break;
1159 			INIT_HLIST_NODE(&ifa->hash);
1160 			if (colon)
1161 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1162 			else
1163 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1164 		} else {
1165 			ret = 0;
1166 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1167 				break;
1168 			inet_del_ifa(in_dev, ifap, 0);
1169 			ifa->ifa_broadcast = 0;
1170 			ifa->ifa_scope = 0;
1171 		}
1172 
1173 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1174 
1175 		if (!(dev->flags & IFF_POINTOPOINT)) {
1176 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1177 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1178 			if ((dev->flags & IFF_BROADCAST) &&
1179 			    ifa->ifa_prefixlen < 31)
1180 				ifa->ifa_broadcast = ifa->ifa_address |
1181 						     ~ifa->ifa_mask;
1182 		} else {
1183 			ifa->ifa_prefixlen = 32;
1184 			ifa->ifa_mask = inet_make_mask(32);
1185 		}
1186 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1187 		ret = inet_set_ifa(dev, ifa);
1188 		break;
1189 
1190 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1191 		ret = 0;
1192 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1193 			inet_del_ifa(in_dev, ifap, 0);
1194 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1195 			inet_insert_ifa(ifa);
1196 		}
1197 		break;
1198 
1199 	case SIOCSIFDSTADDR:	/* Set the destination address */
1200 		ret = 0;
1201 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1202 			break;
1203 		ret = -EINVAL;
1204 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1205 			break;
1206 		ret = 0;
1207 		inet_del_ifa(in_dev, ifap, 0);
1208 		ifa->ifa_address = sin->sin_addr.s_addr;
1209 		inet_insert_ifa(ifa);
1210 		break;
1211 
1212 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1213 
1214 		/*
1215 		 *	The mask we set must be legal.
1216 		 */
1217 		ret = -EINVAL;
1218 		if (bad_mask(sin->sin_addr.s_addr, 0))
1219 			break;
1220 		ret = 0;
1221 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1222 			__be32 old_mask = ifa->ifa_mask;
1223 			inet_del_ifa(in_dev, ifap, 0);
1224 			ifa->ifa_mask = sin->sin_addr.s_addr;
1225 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1226 
1227 			/* See if current broadcast address matches
1228 			 * with current netmask, then recalculate
1229 			 * the broadcast address. Otherwise it's a
1230 			 * funny address, so don't touch it since
1231 			 * the user seems to know what (s)he's doing...
1232 			 */
1233 			if ((dev->flags & IFF_BROADCAST) &&
1234 			    (ifa->ifa_prefixlen < 31) &&
1235 			    (ifa->ifa_broadcast ==
1236 			     (ifa->ifa_local|~old_mask))) {
1237 				ifa->ifa_broadcast = (ifa->ifa_local |
1238 						      ~sin->sin_addr.s_addr);
1239 			}
1240 			inet_insert_ifa(ifa);
1241 		}
1242 		break;
1243 	}
1244 done:
1245 	rtnl_unlock();
1246 out:
1247 	return ret;
1248 }
1249 
1250 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1251 {
1252 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1253 	const struct in_ifaddr *ifa;
1254 	struct ifreq ifr;
1255 	int done = 0;
1256 
1257 	if (WARN_ON(size > sizeof(struct ifreq)))
1258 		goto out;
1259 
1260 	if (!in_dev)
1261 		goto out;
1262 
1263 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1264 		if (!buf) {
1265 			done += size;
1266 			continue;
1267 		}
1268 		if (len < size)
1269 			break;
1270 		memset(&ifr, 0, sizeof(struct ifreq));
1271 		strcpy(ifr.ifr_name, ifa->ifa_label);
1272 
1273 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1274 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1275 								ifa->ifa_local;
1276 
1277 		if (copy_to_user(buf + done, &ifr, size)) {
1278 			done = -EFAULT;
1279 			break;
1280 		}
1281 		len  -= size;
1282 		done += size;
1283 	}
1284 out:
1285 	return done;
1286 }
1287 
1288 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1289 				 int scope)
1290 {
1291 	const struct in_ifaddr *ifa;
1292 
1293 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1294 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1295 			continue;
1296 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1297 		    ifa->ifa_scope <= scope)
1298 			return ifa->ifa_local;
1299 	}
1300 
1301 	return 0;
1302 }
1303 
1304 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1305 {
1306 	const struct in_ifaddr *ifa;
1307 	__be32 addr = 0;
1308 	unsigned char localnet_scope = RT_SCOPE_HOST;
1309 	struct in_device *in_dev;
1310 	struct net *net = dev_net(dev);
1311 	int master_idx;
1312 
1313 	rcu_read_lock();
1314 	in_dev = __in_dev_get_rcu(dev);
1315 	if (!in_dev)
1316 		goto no_in_dev;
1317 
1318 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1319 		localnet_scope = RT_SCOPE_LINK;
1320 
1321 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1322 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1323 			continue;
1324 		if (min(ifa->ifa_scope, localnet_scope) > scope)
1325 			continue;
1326 		if (!dst || inet_ifa_match(dst, ifa)) {
1327 			addr = ifa->ifa_local;
1328 			break;
1329 		}
1330 		if (!addr)
1331 			addr = ifa->ifa_local;
1332 	}
1333 
1334 	if (addr)
1335 		goto out_unlock;
1336 no_in_dev:
1337 	master_idx = l3mdev_master_ifindex_rcu(dev);
1338 
1339 	/* For VRFs, the VRF device takes the place of the loopback device,
1340 	 * with addresses on it being preferred.  Note in such cases the
1341 	 * loopback device will be among the devices that fail the master_idx
1342 	 * equality check in the loop below.
1343 	 */
1344 	if (master_idx &&
1345 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1346 	    (in_dev = __in_dev_get_rcu(dev))) {
1347 		addr = in_dev_select_addr(in_dev, scope);
1348 		if (addr)
1349 			goto out_unlock;
1350 	}
1351 
1352 	/* Not loopback addresses on loopback should be preferred
1353 	   in this case. It is important that lo is the first interface
1354 	   in dev_base list.
1355 	 */
1356 	for_each_netdev_rcu(net, dev) {
1357 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1358 			continue;
1359 
1360 		in_dev = __in_dev_get_rcu(dev);
1361 		if (!in_dev)
1362 			continue;
1363 
1364 		addr = in_dev_select_addr(in_dev, scope);
1365 		if (addr)
1366 			goto out_unlock;
1367 	}
1368 out_unlock:
1369 	rcu_read_unlock();
1370 	return addr;
1371 }
1372 EXPORT_SYMBOL(inet_select_addr);
1373 
1374 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1375 			      __be32 local, int scope)
1376 {
1377 	unsigned char localnet_scope = RT_SCOPE_HOST;
1378 	const struct in_ifaddr *ifa;
1379 	__be32 addr = 0;
1380 	int same = 0;
1381 
1382 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1383 		localnet_scope = RT_SCOPE_LINK;
1384 
1385 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1386 		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1387 
1388 		if (!addr &&
1389 		    (local == ifa->ifa_local || !local) &&
1390 		    min_scope <= scope) {
1391 			addr = ifa->ifa_local;
1392 			if (same)
1393 				break;
1394 		}
1395 		if (!same) {
1396 			same = (!local || inet_ifa_match(local, ifa)) &&
1397 				(!dst || inet_ifa_match(dst, ifa));
1398 			if (same && addr) {
1399 				if (local || !dst)
1400 					break;
1401 				/* Is the selected addr into dst subnet? */
1402 				if (inet_ifa_match(addr, ifa))
1403 					break;
1404 				/* No, then can we use new local src? */
1405 				if (min_scope <= scope) {
1406 					addr = ifa->ifa_local;
1407 					break;
1408 				}
1409 				/* search for large dst subnet for addr */
1410 				same = 0;
1411 			}
1412 		}
1413 	}
1414 
1415 	return same ? addr : 0;
1416 }
1417 
1418 /*
1419  * Confirm that local IP address exists using wildcards:
1420  * - net: netns to check, cannot be NULL
1421  * - in_dev: only on this interface, NULL=any interface
1422  * - dst: only in the same subnet as dst, 0=any dst
1423  * - local: address, 0=autoselect the local address
1424  * - scope: maximum allowed scope value for the local address
1425  */
1426 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1427 			 __be32 dst, __be32 local, int scope)
1428 {
1429 	__be32 addr = 0;
1430 	struct net_device *dev;
1431 
1432 	if (in_dev)
1433 		return confirm_addr_indev(in_dev, dst, local, scope);
1434 
1435 	rcu_read_lock();
1436 	for_each_netdev_rcu(net, dev) {
1437 		in_dev = __in_dev_get_rcu(dev);
1438 		if (in_dev) {
1439 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1440 			if (addr)
1441 				break;
1442 		}
1443 	}
1444 	rcu_read_unlock();
1445 
1446 	return addr;
1447 }
1448 EXPORT_SYMBOL(inet_confirm_addr);
1449 
1450 /*
1451  *	Device notifier
1452  */
1453 
1454 int register_inetaddr_notifier(struct notifier_block *nb)
1455 {
1456 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1457 }
1458 EXPORT_SYMBOL(register_inetaddr_notifier);
1459 
1460 int unregister_inetaddr_notifier(struct notifier_block *nb)
1461 {
1462 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1463 }
1464 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1465 
1466 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1467 {
1468 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1469 }
1470 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1471 
1472 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1473 {
1474 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1475 	    nb);
1476 }
1477 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1478 
1479 /* Rename ifa_labels for a device name change. Make some effort to preserve
1480  * existing alias numbering and to create unique labels if possible.
1481 */
1482 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1483 {
1484 	struct in_ifaddr *ifa;
1485 	int named = 0;
1486 
1487 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1488 		char old[IFNAMSIZ], *dot;
1489 
1490 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1491 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1492 		if (named++ == 0)
1493 			goto skip;
1494 		dot = strchr(old, ':');
1495 		if (!dot) {
1496 			sprintf(old, ":%d", named);
1497 			dot = old;
1498 		}
1499 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1500 			strcat(ifa->ifa_label, dot);
1501 		else
1502 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1503 skip:
1504 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1505 	}
1506 }
1507 
1508 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1509 					struct in_device *in_dev)
1510 
1511 {
1512 	const struct in_ifaddr *ifa;
1513 
1514 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1515 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1516 			 ifa->ifa_local, dev,
1517 			 ifa->ifa_local, NULL,
1518 			 dev->dev_addr, NULL);
1519 	}
1520 }
1521 
1522 /* Called only under RTNL semaphore */
1523 
1524 static int inetdev_event(struct notifier_block *this, unsigned long event,
1525 			 void *ptr)
1526 {
1527 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1528 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1529 
1530 	ASSERT_RTNL();
1531 
1532 	if (!in_dev) {
1533 		if (event == NETDEV_REGISTER) {
1534 			in_dev = inetdev_init(dev);
1535 			if (IS_ERR(in_dev))
1536 				return notifier_from_errno(PTR_ERR(in_dev));
1537 			if (dev->flags & IFF_LOOPBACK) {
1538 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1539 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1540 			}
1541 		} else if (event == NETDEV_CHANGEMTU) {
1542 			/* Re-enabling IP */
1543 			if (inetdev_valid_mtu(dev->mtu))
1544 				in_dev = inetdev_init(dev);
1545 		}
1546 		goto out;
1547 	}
1548 
1549 	switch (event) {
1550 	case NETDEV_REGISTER:
1551 		pr_debug("%s: bug\n", __func__);
1552 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1553 		break;
1554 	case NETDEV_UP:
1555 		if (!inetdev_valid_mtu(dev->mtu))
1556 			break;
1557 		if (dev->flags & IFF_LOOPBACK) {
1558 			struct in_ifaddr *ifa = inet_alloc_ifa();
1559 
1560 			if (ifa) {
1561 				INIT_HLIST_NODE(&ifa->hash);
1562 				ifa->ifa_local =
1563 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1564 				ifa->ifa_prefixlen = 8;
1565 				ifa->ifa_mask = inet_make_mask(8);
1566 				in_dev_hold(in_dev);
1567 				ifa->ifa_dev = in_dev;
1568 				ifa->ifa_scope = RT_SCOPE_HOST;
1569 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1570 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1571 						 INFINITY_LIFE_TIME);
1572 				ipv4_devconf_setall(in_dev);
1573 				neigh_parms_data_state_setall(in_dev->arp_parms);
1574 				inet_insert_ifa(ifa);
1575 			}
1576 		}
1577 		ip_mc_up(in_dev);
1578 		fallthrough;
1579 	case NETDEV_CHANGEADDR:
1580 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1581 			break;
1582 		fallthrough;
1583 	case NETDEV_NOTIFY_PEERS:
1584 		/* Send gratuitous ARP to notify of link change */
1585 		inetdev_send_gratuitous_arp(dev, in_dev);
1586 		break;
1587 	case NETDEV_DOWN:
1588 		ip_mc_down(in_dev);
1589 		break;
1590 	case NETDEV_PRE_TYPE_CHANGE:
1591 		ip_mc_unmap(in_dev);
1592 		break;
1593 	case NETDEV_POST_TYPE_CHANGE:
1594 		ip_mc_remap(in_dev);
1595 		break;
1596 	case NETDEV_CHANGEMTU:
1597 		if (inetdev_valid_mtu(dev->mtu))
1598 			break;
1599 		/* disable IP when MTU is not enough */
1600 		fallthrough;
1601 	case NETDEV_UNREGISTER:
1602 		inetdev_destroy(in_dev);
1603 		break;
1604 	case NETDEV_CHANGENAME:
1605 		/* Do not notify about label change, this event is
1606 		 * not interesting to applications using netlink.
1607 		 */
1608 		inetdev_changename(dev, in_dev);
1609 
1610 		devinet_sysctl_unregister(in_dev);
1611 		devinet_sysctl_register(in_dev);
1612 		break;
1613 	}
1614 out:
1615 	return NOTIFY_DONE;
1616 }
1617 
1618 static struct notifier_block ip_netdev_notifier = {
1619 	.notifier_call = inetdev_event,
1620 };
1621 
1622 static size_t inet_nlmsg_size(void)
1623 {
1624 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1625 	       + nla_total_size(4) /* IFA_ADDRESS */
1626 	       + nla_total_size(4) /* IFA_LOCAL */
1627 	       + nla_total_size(4) /* IFA_BROADCAST */
1628 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1629 	       + nla_total_size(4)  /* IFA_FLAGS */
1630 	       + nla_total_size(1)  /* IFA_PROTO */
1631 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1632 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1633 }
1634 
1635 static inline u32 cstamp_delta(unsigned long cstamp)
1636 {
1637 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1638 }
1639 
1640 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1641 			 unsigned long tstamp, u32 preferred, u32 valid)
1642 {
1643 	struct ifa_cacheinfo ci;
1644 
1645 	ci.cstamp = cstamp_delta(cstamp);
1646 	ci.tstamp = cstamp_delta(tstamp);
1647 	ci.ifa_prefered = preferred;
1648 	ci.ifa_valid = valid;
1649 
1650 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1651 }
1652 
1653 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1654 			    struct inet_fill_args *args)
1655 {
1656 	struct ifaddrmsg *ifm;
1657 	struct nlmsghdr  *nlh;
1658 	u32 preferred, valid;
1659 
1660 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1661 			args->flags);
1662 	if (!nlh)
1663 		return -EMSGSIZE;
1664 
1665 	ifm = nlmsg_data(nlh);
1666 	ifm->ifa_family = AF_INET;
1667 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1668 	ifm->ifa_flags = ifa->ifa_flags;
1669 	ifm->ifa_scope = ifa->ifa_scope;
1670 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1671 
1672 	if (args->netnsid >= 0 &&
1673 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1674 		goto nla_put_failure;
1675 
1676 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1677 		preferred = ifa->ifa_preferred_lft;
1678 		valid = ifa->ifa_valid_lft;
1679 		if (preferred != INFINITY_LIFE_TIME) {
1680 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1681 
1682 			if (preferred > tval)
1683 				preferred -= tval;
1684 			else
1685 				preferred = 0;
1686 			if (valid != INFINITY_LIFE_TIME) {
1687 				if (valid > tval)
1688 					valid -= tval;
1689 				else
1690 					valid = 0;
1691 			}
1692 		}
1693 	} else {
1694 		preferred = INFINITY_LIFE_TIME;
1695 		valid = INFINITY_LIFE_TIME;
1696 	}
1697 	if ((ifa->ifa_address &&
1698 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1699 	    (ifa->ifa_local &&
1700 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1701 	    (ifa->ifa_broadcast &&
1702 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1703 	    (ifa->ifa_label[0] &&
1704 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1705 	    (ifa->ifa_proto &&
1706 	     nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1707 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1708 	    (ifa->ifa_rt_priority &&
1709 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1710 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1711 			  preferred, valid))
1712 		goto nla_put_failure;
1713 
1714 	nlmsg_end(skb, nlh);
1715 	return 0;
1716 
1717 nla_put_failure:
1718 	nlmsg_cancel(skb, nlh);
1719 	return -EMSGSIZE;
1720 }
1721 
1722 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1723 				      struct inet_fill_args *fillargs,
1724 				      struct net **tgt_net, struct sock *sk,
1725 				      struct netlink_callback *cb)
1726 {
1727 	struct netlink_ext_ack *extack = cb->extack;
1728 	struct nlattr *tb[IFA_MAX+1];
1729 	struct ifaddrmsg *ifm;
1730 	int err, i;
1731 
1732 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1733 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1734 		return -EINVAL;
1735 	}
1736 
1737 	ifm = nlmsg_data(nlh);
1738 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1739 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1740 		return -EINVAL;
1741 	}
1742 
1743 	fillargs->ifindex = ifm->ifa_index;
1744 	if (fillargs->ifindex) {
1745 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1746 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1747 	}
1748 
1749 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1750 					    ifa_ipv4_policy, extack);
1751 	if (err < 0)
1752 		return err;
1753 
1754 	for (i = 0; i <= IFA_MAX; ++i) {
1755 		if (!tb[i])
1756 			continue;
1757 
1758 		if (i == IFA_TARGET_NETNSID) {
1759 			struct net *net;
1760 
1761 			fillargs->netnsid = nla_get_s32(tb[i]);
1762 
1763 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1764 			if (IS_ERR(net)) {
1765 				fillargs->netnsid = -1;
1766 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1767 				return PTR_ERR(net);
1768 			}
1769 			*tgt_net = net;
1770 		} else {
1771 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1772 			return -EINVAL;
1773 		}
1774 	}
1775 
1776 	return 0;
1777 }
1778 
1779 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1780 			    struct netlink_callback *cb, int s_ip_idx,
1781 			    struct inet_fill_args *fillargs)
1782 {
1783 	struct in_ifaddr *ifa;
1784 	int ip_idx = 0;
1785 	int err;
1786 
1787 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1788 		if (ip_idx < s_ip_idx) {
1789 			ip_idx++;
1790 			continue;
1791 		}
1792 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1793 		if (err < 0)
1794 			goto done;
1795 
1796 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1797 		ip_idx++;
1798 	}
1799 	err = 0;
1800 
1801 done:
1802 	cb->args[2] = ip_idx;
1803 
1804 	return err;
1805 }
1806 
1807 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1808 {
1809 	const struct nlmsghdr *nlh = cb->nlh;
1810 	struct inet_fill_args fillargs = {
1811 		.portid = NETLINK_CB(cb->skb).portid,
1812 		.seq = nlh->nlmsg_seq,
1813 		.event = RTM_NEWADDR,
1814 		.flags = NLM_F_MULTI,
1815 		.netnsid = -1,
1816 	};
1817 	struct net *net = sock_net(skb->sk);
1818 	struct net *tgt_net = net;
1819 	int h, s_h;
1820 	int idx, s_idx;
1821 	int s_ip_idx;
1822 	struct net_device *dev;
1823 	struct in_device *in_dev;
1824 	struct hlist_head *head;
1825 	int err = 0;
1826 
1827 	s_h = cb->args[0];
1828 	s_idx = idx = cb->args[1];
1829 	s_ip_idx = cb->args[2];
1830 
1831 	if (cb->strict_check) {
1832 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1833 						 skb->sk, cb);
1834 		if (err < 0)
1835 			goto put_tgt_net;
1836 
1837 		err = 0;
1838 		if (fillargs.ifindex) {
1839 			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1840 			if (!dev) {
1841 				err = -ENODEV;
1842 				goto put_tgt_net;
1843 			}
1844 
1845 			in_dev = __in_dev_get_rtnl(dev);
1846 			if (in_dev) {
1847 				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1848 						       &fillargs);
1849 			}
1850 			goto put_tgt_net;
1851 		}
1852 	}
1853 
1854 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1855 		idx = 0;
1856 		head = &tgt_net->dev_index_head[h];
1857 		rcu_read_lock();
1858 		cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1859 			  tgt_net->dev_base_seq;
1860 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1861 			if (idx < s_idx)
1862 				goto cont;
1863 			if (h > s_h || idx > s_idx)
1864 				s_ip_idx = 0;
1865 			in_dev = __in_dev_get_rcu(dev);
1866 			if (!in_dev)
1867 				goto cont;
1868 
1869 			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1870 					       &fillargs);
1871 			if (err < 0) {
1872 				rcu_read_unlock();
1873 				goto done;
1874 			}
1875 cont:
1876 			idx++;
1877 		}
1878 		rcu_read_unlock();
1879 	}
1880 
1881 done:
1882 	cb->args[0] = h;
1883 	cb->args[1] = idx;
1884 put_tgt_net:
1885 	if (fillargs.netnsid >= 0)
1886 		put_net(tgt_net);
1887 
1888 	return skb->len ? : err;
1889 }
1890 
1891 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1892 		      u32 portid)
1893 {
1894 	struct inet_fill_args fillargs = {
1895 		.portid = portid,
1896 		.seq = nlh ? nlh->nlmsg_seq : 0,
1897 		.event = event,
1898 		.flags = 0,
1899 		.netnsid = -1,
1900 	};
1901 	struct sk_buff *skb;
1902 	int err = -ENOBUFS;
1903 	struct net *net;
1904 
1905 	net = dev_net(ifa->ifa_dev->dev);
1906 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1907 	if (!skb)
1908 		goto errout;
1909 
1910 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1911 	if (err < 0) {
1912 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1913 		WARN_ON(err == -EMSGSIZE);
1914 		kfree_skb(skb);
1915 		goto errout;
1916 	}
1917 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1918 	return;
1919 errout:
1920 	if (err < 0)
1921 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1922 }
1923 
1924 static size_t inet_get_link_af_size(const struct net_device *dev,
1925 				    u32 ext_filter_mask)
1926 {
1927 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1928 
1929 	if (!in_dev)
1930 		return 0;
1931 
1932 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1933 }
1934 
1935 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1936 			     u32 ext_filter_mask)
1937 {
1938 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1939 	struct nlattr *nla;
1940 	int i;
1941 
1942 	if (!in_dev)
1943 		return -ENODATA;
1944 
1945 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1946 	if (!nla)
1947 		return -EMSGSIZE;
1948 
1949 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1950 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1951 
1952 	return 0;
1953 }
1954 
1955 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1956 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1957 };
1958 
1959 static int inet_validate_link_af(const struct net_device *dev,
1960 				 const struct nlattr *nla,
1961 				 struct netlink_ext_ack *extack)
1962 {
1963 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1964 	int err, rem;
1965 
1966 	if (dev && !__in_dev_get_rtnl(dev))
1967 		return -EAFNOSUPPORT;
1968 
1969 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1970 					  inet_af_policy, extack);
1971 	if (err < 0)
1972 		return err;
1973 
1974 	if (tb[IFLA_INET_CONF]) {
1975 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1976 			int cfgid = nla_type(a);
1977 
1978 			if (nla_len(a) < 4)
1979 				return -EINVAL;
1980 
1981 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1982 				return -EINVAL;
1983 		}
1984 	}
1985 
1986 	return 0;
1987 }
1988 
1989 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
1990 			    struct netlink_ext_ack *extack)
1991 {
1992 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1993 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1994 	int rem;
1995 
1996 	if (!in_dev)
1997 		return -EAFNOSUPPORT;
1998 
1999 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2000 		return -EINVAL;
2001 
2002 	if (tb[IFLA_INET_CONF]) {
2003 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2004 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2005 	}
2006 
2007 	return 0;
2008 }
2009 
2010 static int inet_netconf_msgsize_devconf(int type)
2011 {
2012 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2013 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2014 	bool all = false;
2015 
2016 	if (type == NETCONFA_ALL)
2017 		all = true;
2018 
2019 	if (all || type == NETCONFA_FORWARDING)
2020 		size += nla_total_size(4);
2021 	if (all || type == NETCONFA_RP_FILTER)
2022 		size += nla_total_size(4);
2023 	if (all || type == NETCONFA_MC_FORWARDING)
2024 		size += nla_total_size(4);
2025 	if (all || type == NETCONFA_BC_FORWARDING)
2026 		size += nla_total_size(4);
2027 	if (all || type == NETCONFA_PROXY_NEIGH)
2028 		size += nla_total_size(4);
2029 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2030 		size += nla_total_size(4);
2031 
2032 	return size;
2033 }
2034 
2035 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2036 				     struct ipv4_devconf *devconf, u32 portid,
2037 				     u32 seq, int event, unsigned int flags,
2038 				     int type)
2039 {
2040 	struct nlmsghdr  *nlh;
2041 	struct netconfmsg *ncm;
2042 	bool all = false;
2043 
2044 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2045 			flags);
2046 	if (!nlh)
2047 		return -EMSGSIZE;
2048 
2049 	if (type == NETCONFA_ALL)
2050 		all = true;
2051 
2052 	ncm = nlmsg_data(nlh);
2053 	ncm->ncm_family = AF_INET;
2054 
2055 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2056 		goto nla_put_failure;
2057 
2058 	if (!devconf)
2059 		goto out;
2060 
2061 	if ((all || type == NETCONFA_FORWARDING) &&
2062 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2063 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2064 		goto nla_put_failure;
2065 	if ((all || type == NETCONFA_RP_FILTER) &&
2066 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2067 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2068 		goto nla_put_failure;
2069 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2070 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2071 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2072 		goto nla_put_failure;
2073 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2074 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2075 			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2076 		goto nla_put_failure;
2077 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2078 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2079 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2080 		goto nla_put_failure;
2081 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2082 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2083 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2084 		goto nla_put_failure;
2085 
2086 out:
2087 	nlmsg_end(skb, nlh);
2088 	return 0;
2089 
2090 nla_put_failure:
2091 	nlmsg_cancel(skb, nlh);
2092 	return -EMSGSIZE;
2093 }
2094 
2095 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2096 				 int ifindex, struct ipv4_devconf *devconf)
2097 {
2098 	struct sk_buff *skb;
2099 	int err = -ENOBUFS;
2100 
2101 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2102 	if (!skb)
2103 		goto errout;
2104 
2105 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2106 					event, 0, type);
2107 	if (err < 0) {
2108 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2109 		WARN_ON(err == -EMSGSIZE);
2110 		kfree_skb(skb);
2111 		goto errout;
2112 	}
2113 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2114 	return;
2115 errout:
2116 	if (err < 0)
2117 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2118 }
2119 
2120 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2121 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2122 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2123 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2124 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2125 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2126 };
2127 
2128 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2129 				      const struct nlmsghdr *nlh,
2130 				      struct nlattr **tb,
2131 				      struct netlink_ext_ack *extack)
2132 {
2133 	int i, err;
2134 
2135 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2136 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2137 		return -EINVAL;
2138 	}
2139 
2140 	if (!netlink_strict_get_check(skb))
2141 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2142 					      tb, NETCONFA_MAX,
2143 					      devconf_ipv4_policy, extack);
2144 
2145 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2146 					    tb, NETCONFA_MAX,
2147 					    devconf_ipv4_policy, extack);
2148 	if (err)
2149 		return err;
2150 
2151 	for (i = 0; i <= NETCONFA_MAX; i++) {
2152 		if (!tb[i])
2153 			continue;
2154 
2155 		switch (i) {
2156 		case NETCONFA_IFINDEX:
2157 			break;
2158 		default:
2159 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2160 			return -EINVAL;
2161 		}
2162 	}
2163 
2164 	return 0;
2165 }
2166 
2167 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2168 				    struct nlmsghdr *nlh,
2169 				    struct netlink_ext_ack *extack)
2170 {
2171 	struct net *net = sock_net(in_skb->sk);
2172 	struct nlattr *tb[NETCONFA_MAX+1];
2173 	struct sk_buff *skb;
2174 	struct ipv4_devconf *devconf;
2175 	struct in_device *in_dev;
2176 	struct net_device *dev;
2177 	int ifindex;
2178 	int err;
2179 
2180 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2181 	if (err)
2182 		goto errout;
2183 
2184 	err = -EINVAL;
2185 	if (!tb[NETCONFA_IFINDEX])
2186 		goto errout;
2187 
2188 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2189 	switch (ifindex) {
2190 	case NETCONFA_IFINDEX_ALL:
2191 		devconf = net->ipv4.devconf_all;
2192 		break;
2193 	case NETCONFA_IFINDEX_DEFAULT:
2194 		devconf = net->ipv4.devconf_dflt;
2195 		break;
2196 	default:
2197 		dev = __dev_get_by_index(net, ifindex);
2198 		if (!dev)
2199 			goto errout;
2200 		in_dev = __in_dev_get_rtnl(dev);
2201 		if (!in_dev)
2202 			goto errout;
2203 		devconf = &in_dev->cnf;
2204 		break;
2205 	}
2206 
2207 	err = -ENOBUFS;
2208 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2209 	if (!skb)
2210 		goto errout;
2211 
2212 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2213 					NETLINK_CB(in_skb).portid,
2214 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2215 					NETCONFA_ALL);
2216 	if (err < 0) {
2217 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2218 		WARN_ON(err == -EMSGSIZE);
2219 		kfree_skb(skb);
2220 		goto errout;
2221 	}
2222 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2223 errout:
2224 	return err;
2225 }
2226 
2227 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2228 				     struct netlink_callback *cb)
2229 {
2230 	const struct nlmsghdr *nlh = cb->nlh;
2231 	struct net *net = sock_net(skb->sk);
2232 	int h, s_h;
2233 	int idx, s_idx;
2234 	struct net_device *dev;
2235 	struct in_device *in_dev;
2236 	struct hlist_head *head;
2237 
2238 	if (cb->strict_check) {
2239 		struct netlink_ext_ack *extack = cb->extack;
2240 		struct netconfmsg *ncm;
2241 
2242 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2243 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2244 			return -EINVAL;
2245 		}
2246 
2247 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2248 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2249 			return -EINVAL;
2250 		}
2251 	}
2252 
2253 	s_h = cb->args[0];
2254 	s_idx = idx = cb->args[1];
2255 
2256 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2257 		idx = 0;
2258 		head = &net->dev_index_head[h];
2259 		rcu_read_lock();
2260 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2261 			  net->dev_base_seq;
2262 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2263 			if (idx < s_idx)
2264 				goto cont;
2265 			in_dev = __in_dev_get_rcu(dev);
2266 			if (!in_dev)
2267 				goto cont;
2268 
2269 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2270 						      &in_dev->cnf,
2271 						      NETLINK_CB(cb->skb).portid,
2272 						      nlh->nlmsg_seq,
2273 						      RTM_NEWNETCONF,
2274 						      NLM_F_MULTI,
2275 						      NETCONFA_ALL) < 0) {
2276 				rcu_read_unlock();
2277 				goto done;
2278 			}
2279 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2280 cont:
2281 			idx++;
2282 		}
2283 		rcu_read_unlock();
2284 	}
2285 	if (h == NETDEV_HASHENTRIES) {
2286 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2287 					      net->ipv4.devconf_all,
2288 					      NETLINK_CB(cb->skb).portid,
2289 					      nlh->nlmsg_seq,
2290 					      RTM_NEWNETCONF, NLM_F_MULTI,
2291 					      NETCONFA_ALL) < 0)
2292 			goto done;
2293 		else
2294 			h++;
2295 	}
2296 	if (h == NETDEV_HASHENTRIES + 1) {
2297 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2298 					      net->ipv4.devconf_dflt,
2299 					      NETLINK_CB(cb->skb).portid,
2300 					      nlh->nlmsg_seq,
2301 					      RTM_NEWNETCONF, NLM_F_MULTI,
2302 					      NETCONFA_ALL) < 0)
2303 			goto done;
2304 		else
2305 			h++;
2306 	}
2307 done:
2308 	cb->args[0] = h;
2309 	cb->args[1] = idx;
2310 
2311 	return skb->len;
2312 }
2313 
2314 #ifdef CONFIG_SYSCTL
2315 
2316 static void devinet_copy_dflt_conf(struct net *net, int i)
2317 {
2318 	struct net_device *dev;
2319 
2320 	rcu_read_lock();
2321 	for_each_netdev_rcu(net, dev) {
2322 		struct in_device *in_dev;
2323 
2324 		in_dev = __in_dev_get_rcu(dev);
2325 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2326 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2327 	}
2328 	rcu_read_unlock();
2329 }
2330 
2331 /* called with RTNL locked */
2332 static void inet_forward_change(struct net *net)
2333 {
2334 	struct net_device *dev;
2335 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2336 
2337 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2338 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2339 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2340 				    NETCONFA_FORWARDING,
2341 				    NETCONFA_IFINDEX_ALL,
2342 				    net->ipv4.devconf_all);
2343 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2344 				    NETCONFA_FORWARDING,
2345 				    NETCONFA_IFINDEX_DEFAULT,
2346 				    net->ipv4.devconf_dflt);
2347 
2348 	for_each_netdev(net, dev) {
2349 		struct in_device *in_dev;
2350 
2351 		if (on)
2352 			dev_disable_lro(dev);
2353 
2354 		in_dev = __in_dev_get_rtnl(dev);
2355 		if (in_dev) {
2356 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2357 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2358 						    NETCONFA_FORWARDING,
2359 						    dev->ifindex, &in_dev->cnf);
2360 		}
2361 	}
2362 }
2363 
2364 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2365 {
2366 	if (cnf == net->ipv4.devconf_dflt)
2367 		return NETCONFA_IFINDEX_DEFAULT;
2368 	else if (cnf == net->ipv4.devconf_all)
2369 		return NETCONFA_IFINDEX_ALL;
2370 	else {
2371 		struct in_device *idev
2372 			= container_of(cnf, struct in_device, cnf);
2373 		return idev->dev->ifindex;
2374 	}
2375 }
2376 
2377 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2378 			     void *buffer, size_t *lenp, loff_t *ppos)
2379 {
2380 	int old_value = *(int *)ctl->data;
2381 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2382 	int new_value = *(int *)ctl->data;
2383 
2384 	if (write) {
2385 		struct ipv4_devconf *cnf = ctl->extra1;
2386 		struct net *net = ctl->extra2;
2387 		int i = (int *)ctl->data - cnf->data;
2388 		int ifindex;
2389 
2390 		set_bit(i, cnf->state);
2391 
2392 		if (cnf == net->ipv4.devconf_dflt)
2393 			devinet_copy_dflt_conf(net, i);
2394 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2395 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2396 			if ((new_value == 0) && (old_value != 0))
2397 				rt_cache_flush(net);
2398 
2399 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2400 		    new_value != old_value)
2401 			rt_cache_flush(net);
2402 
2403 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2404 		    new_value != old_value) {
2405 			ifindex = devinet_conf_ifindex(net, cnf);
2406 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2407 						    NETCONFA_RP_FILTER,
2408 						    ifindex, cnf);
2409 		}
2410 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2411 		    new_value != old_value) {
2412 			ifindex = devinet_conf_ifindex(net, cnf);
2413 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2414 						    NETCONFA_PROXY_NEIGH,
2415 						    ifindex, cnf);
2416 		}
2417 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2418 		    new_value != old_value) {
2419 			ifindex = devinet_conf_ifindex(net, cnf);
2420 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2421 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2422 						    ifindex, cnf);
2423 		}
2424 	}
2425 
2426 	return ret;
2427 }
2428 
2429 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2430 				  void *buffer, size_t *lenp, loff_t *ppos)
2431 {
2432 	int *valp = ctl->data;
2433 	int val = *valp;
2434 	loff_t pos = *ppos;
2435 	struct net *net = ctl->extra2;
2436 	int ret;
2437 
2438 	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2439 		return -EPERM;
2440 
2441 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2442 
2443 	if (write && *valp != val) {
2444 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2445 			if (!rtnl_trylock()) {
2446 				/* Restore the original values before restarting */
2447 				*valp = val;
2448 				*ppos = pos;
2449 				return restart_syscall();
2450 			}
2451 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2452 				inet_forward_change(net);
2453 			} else {
2454 				struct ipv4_devconf *cnf = ctl->extra1;
2455 				struct in_device *idev =
2456 					container_of(cnf, struct in_device, cnf);
2457 				if (*valp)
2458 					dev_disable_lro(idev->dev);
2459 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2460 							    NETCONFA_FORWARDING,
2461 							    idev->dev->ifindex,
2462 							    cnf);
2463 			}
2464 			rtnl_unlock();
2465 			rt_cache_flush(net);
2466 		} else
2467 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2468 						    NETCONFA_FORWARDING,
2469 						    NETCONFA_IFINDEX_DEFAULT,
2470 						    net->ipv4.devconf_dflt);
2471 	}
2472 
2473 	return ret;
2474 }
2475 
2476 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2477 				void *buffer, size_t *lenp, loff_t *ppos)
2478 {
2479 	int *valp = ctl->data;
2480 	int val = *valp;
2481 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2482 	struct net *net = ctl->extra2;
2483 
2484 	if (write && *valp != val)
2485 		rt_cache_flush(net);
2486 
2487 	return ret;
2488 }
2489 
2490 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2491 	{ \
2492 		.procname	= name, \
2493 		.data		= ipv4_devconf.data + \
2494 				  IPV4_DEVCONF_ ## attr - 1, \
2495 		.maxlen		= sizeof(int), \
2496 		.mode		= mval, \
2497 		.proc_handler	= proc, \
2498 		.extra1		= &ipv4_devconf, \
2499 	}
2500 
2501 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2502 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2503 
2504 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2505 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2506 
2507 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2508 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2509 
2510 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2511 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2512 
2513 static struct devinet_sysctl_table {
2514 	struct ctl_table_header *sysctl_header;
2515 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2516 } devinet_sysctl = {
2517 	.devinet_vars = {
2518 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2519 					     devinet_sysctl_forward),
2520 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2521 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2522 
2523 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2524 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2525 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2526 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2527 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2528 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2529 					"accept_source_route"),
2530 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2531 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2532 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2533 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2534 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2535 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2536 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2537 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2538 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2539 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2540 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2541 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2542 		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2543 					"arp_evict_nocarrier"),
2544 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2545 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2546 					"force_igmp_version"),
2547 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2548 					"igmpv2_unsolicited_report_interval"),
2549 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2550 					"igmpv3_unsolicited_report_interval"),
2551 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2552 					"ignore_routes_with_linkdown"),
2553 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2554 					"drop_gratuitous_arp"),
2555 
2556 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2557 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2558 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2559 					      "promote_secondaries"),
2560 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2561 					      "route_localnet"),
2562 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2563 					      "drop_unicast_in_l2_multicast"),
2564 	},
2565 };
2566 
2567 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2568 				     int ifindex, struct ipv4_devconf *p)
2569 {
2570 	int i;
2571 	struct devinet_sysctl_table *t;
2572 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2573 
2574 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2575 	if (!t)
2576 		goto out;
2577 
2578 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2579 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2580 		t->devinet_vars[i].extra1 = p;
2581 		t->devinet_vars[i].extra2 = net;
2582 	}
2583 
2584 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2585 
2586 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2587 	if (!t->sysctl_header)
2588 		goto free;
2589 
2590 	p->sysctl = t;
2591 
2592 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2593 				    ifindex, p);
2594 	return 0;
2595 
2596 free:
2597 	kfree(t);
2598 out:
2599 	return -ENOMEM;
2600 }
2601 
2602 static void __devinet_sysctl_unregister(struct net *net,
2603 					struct ipv4_devconf *cnf, int ifindex)
2604 {
2605 	struct devinet_sysctl_table *t = cnf->sysctl;
2606 
2607 	if (t) {
2608 		cnf->sysctl = NULL;
2609 		unregister_net_sysctl_table(t->sysctl_header);
2610 		kfree(t);
2611 	}
2612 
2613 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2614 }
2615 
2616 static int devinet_sysctl_register(struct in_device *idev)
2617 {
2618 	int err;
2619 
2620 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2621 		return -EINVAL;
2622 
2623 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2624 	if (err)
2625 		return err;
2626 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2627 					idev->dev->ifindex, &idev->cnf);
2628 	if (err)
2629 		neigh_sysctl_unregister(idev->arp_parms);
2630 	return err;
2631 }
2632 
2633 static void devinet_sysctl_unregister(struct in_device *idev)
2634 {
2635 	struct net *net = dev_net(idev->dev);
2636 
2637 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2638 	neigh_sysctl_unregister(idev->arp_parms);
2639 }
2640 
2641 static struct ctl_table ctl_forward_entry[] = {
2642 	{
2643 		.procname	= "ip_forward",
2644 		.data		= &ipv4_devconf.data[
2645 					IPV4_DEVCONF_FORWARDING - 1],
2646 		.maxlen		= sizeof(int),
2647 		.mode		= 0644,
2648 		.proc_handler	= devinet_sysctl_forward,
2649 		.extra1		= &ipv4_devconf,
2650 		.extra2		= &init_net,
2651 	},
2652 	{ },
2653 };
2654 #endif
2655 
2656 static __net_init int devinet_init_net(struct net *net)
2657 {
2658 	int err;
2659 	struct ipv4_devconf *all, *dflt;
2660 #ifdef CONFIG_SYSCTL
2661 	struct ctl_table *tbl;
2662 	struct ctl_table_header *forw_hdr;
2663 #endif
2664 
2665 	err = -ENOMEM;
2666 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2667 	if (!all)
2668 		goto err_alloc_all;
2669 
2670 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2671 	if (!dflt)
2672 		goto err_alloc_dflt;
2673 
2674 #ifdef CONFIG_SYSCTL
2675 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2676 	if (!tbl)
2677 		goto err_alloc_ctl;
2678 
2679 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2680 	tbl[0].extra1 = all;
2681 	tbl[0].extra2 = net;
2682 #endif
2683 
2684 	if (!net_eq(net, &init_net)) {
2685 		switch (net_inherit_devconf()) {
2686 		case 3:
2687 			/* copy from the current netns */
2688 			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2689 			       sizeof(ipv4_devconf));
2690 			memcpy(dflt,
2691 			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2692 			       sizeof(ipv4_devconf_dflt));
2693 			break;
2694 		case 0:
2695 		case 1:
2696 			/* copy from init_net */
2697 			memcpy(all, init_net.ipv4.devconf_all,
2698 			       sizeof(ipv4_devconf));
2699 			memcpy(dflt, init_net.ipv4.devconf_dflt,
2700 			       sizeof(ipv4_devconf_dflt));
2701 			break;
2702 		case 2:
2703 			/* use compiled values */
2704 			break;
2705 		}
2706 	}
2707 
2708 #ifdef CONFIG_SYSCTL
2709 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2710 	if (err < 0)
2711 		goto err_reg_all;
2712 
2713 	err = __devinet_sysctl_register(net, "default",
2714 					NETCONFA_IFINDEX_DEFAULT, dflt);
2715 	if (err < 0)
2716 		goto err_reg_dflt;
2717 
2718 	err = -ENOMEM;
2719 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2720 	if (!forw_hdr)
2721 		goto err_reg_ctl;
2722 	net->ipv4.forw_hdr = forw_hdr;
2723 #endif
2724 
2725 	net->ipv4.devconf_all = all;
2726 	net->ipv4.devconf_dflt = dflt;
2727 	return 0;
2728 
2729 #ifdef CONFIG_SYSCTL
2730 err_reg_ctl:
2731 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2732 err_reg_dflt:
2733 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2734 err_reg_all:
2735 	kfree(tbl);
2736 err_alloc_ctl:
2737 #endif
2738 	kfree(dflt);
2739 err_alloc_dflt:
2740 	kfree(all);
2741 err_alloc_all:
2742 	return err;
2743 }
2744 
2745 static __net_exit void devinet_exit_net(struct net *net)
2746 {
2747 #ifdef CONFIG_SYSCTL
2748 	struct ctl_table *tbl;
2749 
2750 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2751 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2752 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2753 				    NETCONFA_IFINDEX_DEFAULT);
2754 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2755 				    NETCONFA_IFINDEX_ALL);
2756 	kfree(tbl);
2757 #endif
2758 	kfree(net->ipv4.devconf_dflt);
2759 	kfree(net->ipv4.devconf_all);
2760 }
2761 
2762 static __net_initdata struct pernet_operations devinet_ops = {
2763 	.init = devinet_init_net,
2764 	.exit = devinet_exit_net,
2765 };
2766 
2767 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2768 	.family		  = AF_INET,
2769 	.fill_link_af	  = inet_fill_link_af,
2770 	.get_link_af_size = inet_get_link_af_size,
2771 	.validate_link_af = inet_validate_link_af,
2772 	.set_link_af	  = inet_set_link_af,
2773 };
2774 
2775 void __init devinet_init(void)
2776 {
2777 	int i;
2778 
2779 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2780 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2781 
2782 	register_pernet_subsys(&devinet_ops);
2783 	register_netdevice_notifier(&ip_netdev_notifier);
2784 
2785 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2786 
2787 	rtnl_af_register(&inet_af_ops);
2788 
2789 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2790 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2791 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2792 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2793 		      inet_netconf_dump_devconf, 0);
2794 }
2795