xref: /openbmc/linux/net/ipv4/devinet.c (revision 3557b3fd)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60 
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77 	},
78 };
79 
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81 	.data = {
82 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89 	},
90 };
91 
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94 
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96 	[IFA_LOCAL]     	= { .type = NLA_U32 },
97 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
98 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
99 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
101 	[IFA_FLAGS]		= { .type = NLA_U32 },
102 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
103 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
104 };
105 
106 struct inet_fill_args {
107 	u32 portid;
108 	u32 seq;
109 	int event;
110 	unsigned int flags;
111 	int netnsid;
112 	int ifindex;
113 };
114 
115 #define IN4_ADDR_HSIZE_SHIFT	8
116 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
117 
118 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
119 
120 static u32 inet_addr_hash(const struct net *net, __be32 addr)
121 {
122 	u32 val = (__force u32) addr ^ net_hash_mix(net);
123 
124 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
125 }
126 
127 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
128 {
129 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
130 
131 	ASSERT_RTNL();
132 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
133 }
134 
135 static void inet_hash_remove(struct in_ifaddr *ifa)
136 {
137 	ASSERT_RTNL();
138 	hlist_del_init_rcu(&ifa->hash);
139 }
140 
141 /**
142  * __ip_dev_find - find the first device with a given source address.
143  * @net: the net namespace
144  * @addr: the source address
145  * @devref: if true, take a reference on the found device
146  *
147  * If a caller uses devref=false, it should be protected by RCU, or RTNL
148  */
149 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
150 {
151 	struct net_device *result = NULL;
152 	struct in_ifaddr *ifa;
153 
154 	rcu_read_lock();
155 	ifa = inet_lookup_ifaddr_rcu(net, addr);
156 	if (!ifa) {
157 		struct flowi4 fl4 = { .daddr = addr };
158 		struct fib_result res = { 0 };
159 		struct fib_table *local;
160 
161 		/* Fallback to FIB local table so that communication
162 		 * over loopback subnets work.
163 		 */
164 		local = fib_get_table(net, RT_TABLE_LOCAL);
165 		if (local &&
166 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167 		    res.type == RTN_LOCAL)
168 			result = FIB_RES_DEV(res);
169 	} else {
170 		result = ifa->ifa_dev->dev;
171 	}
172 	if (result && devref)
173 		dev_hold(result);
174 	rcu_read_unlock();
175 	return result;
176 }
177 EXPORT_SYMBOL(__ip_dev_find);
178 
179 /* called under RCU lock */
180 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
181 {
182 	u32 hash = inet_addr_hash(net, addr);
183 	struct in_ifaddr *ifa;
184 
185 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
186 		if (ifa->ifa_local == addr &&
187 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
188 			return ifa;
189 
190 	return NULL;
191 }
192 
193 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
194 
195 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
196 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
197 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
198 			 int destroy);
199 #ifdef CONFIG_SYSCTL
200 static int devinet_sysctl_register(struct in_device *idev);
201 static void devinet_sysctl_unregister(struct in_device *idev);
202 #else
203 static int devinet_sysctl_register(struct in_device *idev)
204 {
205 	return 0;
206 }
207 static void devinet_sysctl_unregister(struct in_device *idev)
208 {
209 }
210 #endif
211 
212 /* Locks all the inet devices. */
213 
214 static struct in_ifaddr *inet_alloc_ifa(void)
215 {
216 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
217 }
218 
219 static void inet_rcu_free_ifa(struct rcu_head *head)
220 {
221 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
222 	if (ifa->ifa_dev)
223 		in_dev_put(ifa->ifa_dev);
224 	kfree(ifa);
225 }
226 
227 static void inet_free_ifa(struct in_ifaddr *ifa)
228 {
229 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
230 }
231 
232 void in_dev_finish_destroy(struct in_device *idev)
233 {
234 	struct net_device *dev = idev->dev;
235 
236 	WARN_ON(idev->ifa_list);
237 	WARN_ON(idev->mc_list);
238 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
239 #ifdef NET_REFCNT_DEBUG
240 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
241 #endif
242 	dev_put(dev);
243 	if (!idev->dead)
244 		pr_err("Freeing alive in_device %p\n", idev);
245 	else
246 		kfree(idev);
247 }
248 EXPORT_SYMBOL(in_dev_finish_destroy);
249 
250 static struct in_device *inetdev_init(struct net_device *dev)
251 {
252 	struct in_device *in_dev;
253 	int err = -ENOMEM;
254 
255 	ASSERT_RTNL();
256 
257 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
258 	if (!in_dev)
259 		goto out;
260 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
261 			sizeof(in_dev->cnf));
262 	in_dev->cnf.sysctl = NULL;
263 	in_dev->dev = dev;
264 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
265 	if (!in_dev->arp_parms)
266 		goto out_kfree;
267 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
268 		dev_disable_lro(dev);
269 	/* Reference in_dev->dev */
270 	dev_hold(dev);
271 	/* Account for reference dev->ip_ptr (below) */
272 	refcount_set(&in_dev->refcnt, 1);
273 
274 	err = devinet_sysctl_register(in_dev);
275 	if (err) {
276 		in_dev->dead = 1;
277 		in_dev_put(in_dev);
278 		in_dev = NULL;
279 		goto out;
280 	}
281 	ip_mc_init_dev(in_dev);
282 	if (dev->flags & IFF_UP)
283 		ip_mc_up(in_dev);
284 
285 	/* we can receive as soon as ip_ptr is set -- do this last */
286 	rcu_assign_pointer(dev->ip_ptr, in_dev);
287 out:
288 	return in_dev ?: ERR_PTR(err);
289 out_kfree:
290 	kfree(in_dev);
291 	in_dev = NULL;
292 	goto out;
293 }
294 
295 static void in_dev_rcu_put(struct rcu_head *head)
296 {
297 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
298 	in_dev_put(idev);
299 }
300 
301 static void inetdev_destroy(struct in_device *in_dev)
302 {
303 	struct in_ifaddr *ifa;
304 	struct net_device *dev;
305 
306 	ASSERT_RTNL();
307 
308 	dev = in_dev->dev;
309 
310 	in_dev->dead = 1;
311 
312 	ip_mc_destroy_dev(in_dev);
313 
314 	while ((ifa = in_dev->ifa_list) != NULL) {
315 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
316 		inet_free_ifa(ifa);
317 	}
318 
319 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
320 
321 	devinet_sysctl_unregister(in_dev);
322 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
323 	arp_ifdown(dev);
324 
325 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
326 }
327 
328 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
329 {
330 	rcu_read_lock();
331 	for_primary_ifa(in_dev) {
332 		if (inet_ifa_match(a, ifa)) {
333 			if (!b || inet_ifa_match(b, ifa)) {
334 				rcu_read_unlock();
335 				return 1;
336 			}
337 		}
338 	} endfor_ifa(in_dev);
339 	rcu_read_unlock();
340 	return 0;
341 }
342 
343 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
344 			 int destroy, struct nlmsghdr *nlh, u32 portid)
345 {
346 	struct in_ifaddr *promote = NULL;
347 	struct in_ifaddr *ifa, *ifa1 = *ifap;
348 	struct in_ifaddr *last_prim = in_dev->ifa_list;
349 	struct in_ifaddr *prev_prom = NULL;
350 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
351 
352 	ASSERT_RTNL();
353 
354 	if (in_dev->dead)
355 		goto no_promotions;
356 
357 	/* 1. Deleting primary ifaddr forces deletion all secondaries
358 	 * unless alias promotion is set
359 	 **/
360 
361 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
362 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
363 
364 		while ((ifa = *ifap1) != NULL) {
365 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
366 			    ifa1->ifa_scope <= ifa->ifa_scope)
367 				last_prim = ifa;
368 
369 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
370 			    ifa1->ifa_mask != ifa->ifa_mask ||
371 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
372 				ifap1 = &ifa->ifa_next;
373 				prev_prom = ifa;
374 				continue;
375 			}
376 
377 			if (!do_promote) {
378 				inet_hash_remove(ifa);
379 				*ifap1 = ifa->ifa_next;
380 
381 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
382 				blocking_notifier_call_chain(&inetaddr_chain,
383 						NETDEV_DOWN, ifa);
384 				inet_free_ifa(ifa);
385 			} else {
386 				promote = ifa;
387 				break;
388 			}
389 		}
390 	}
391 
392 	/* On promotion all secondaries from subnet are changing
393 	 * the primary IP, we must remove all their routes silently
394 	 * and later to add them back with new prefsrc. Do this
395 	 * while all addresses are on the device list.
396 	 */
397 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
398 		if (ifa1->ifa_mask == ifa->ifa_mask &&
399 		    inet_ifa_match(ifa1->ifa_address, ifa))
400 			fib_del_ifaddr(ifa, ifa1);
401 	}
402 
403 no_promotions:
404 	/* 2. Unlink it */
405 
406 	*ifap = ifa1->ifa_next;
407 	inet_hash_remove(ifa1);
408 
409 	/* 3. Announce address deletion */
410 
411 	/* Send message first, then call notifier.
412 	   At first sight, FIB update triggered by notifier
413 	   will refer to already deleted ifaddr, that could confuse
414 	   netlink listeners. It is not true: look, gated sees
415 	   that route deleted and if it still thinks that ifaddr
416 	   is valid, it will try to restore deleted routes... Grr.
417 	   So that, this order is correct.
418 	 */
419 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
420 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
421 
422 	if (promote) {
423 		struct in_ifaddr *next_sec = promote->ifa_next;
424 
425 		if (prev_prom) {
426 			prev_prom->ifa_next = promote->ifa_next;
427 			promote->ifa_next = last_prim->ifa_next;
428 			last_prim->ifa_next = promote;
429 		}
430 
431 		promote->ifa_flags &= ~IFA_F_SECONDARY;
432 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
433 		blocking_notifier_call_chain(&inetaddr_chain,
434 				NETDEV_UP, promote);
435 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
436 			if (ifa1->ifa_mask != ifa->ifa_mask ||
437 			    !inet_ifa_match(ifa1->ifa_address, ifa))
438 					continue;
439 			fib_add_ifaddr(ifa);
440 		}
441 
442 	}
443 	if (destroy)
444 		inet_free_ifa(ifa1);
445 }
446 
447 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
448 			 int destroy)
449 {
450 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
451 }
452 
453 static void check_lifetime(struct work_struct *work);
454 
455 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
456 
457 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
458 			     u32 portid, struct netlink_ext_ack *extack)
459 {
460 	struct in_device *in_dev = ifa->ifa_dev;
461 	struct in_ifaddr *ifa1, **ifap, **last_primary;
462 	struct in_validator_info ivi;
463 	int ret;
464 
465 	ASSERT_RTNL();
466 
467 	if (!ifa->ifa_local) {
468 		inet_free_ifa(ifa);
469 		return 0;
470 	}
471 
472 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
473 	last_primary = &in_dev->ifa_list;
474 
475 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
476 	     ifap = &ifa1->ifa_next) {
477 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
478 		    ifa->ifa_scope <= ifa1->ifa_scope)
479 			last_primary = &ifa1->ifa_next;
480 		if (ifa1->ifa_mask == ifa->ifa_mask &&
481 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
482 			if (ifa1->ifa_local == ifa->ifa_local) {
483 				inet_free_ifa(ifa);
484 				return -EEXIST;
485 			}
486 			if (ifa1->ifa_scope != ifa->ifa_scope) {
487 				inet_free_ifa(ifa);
488 				return -EINVAL;
489 			}
490 			ifa->ifa_flags |= IFA_F_SECONDARY;
491 		}
492 	}
493 
494 	/* Allow any devices that wish to register ifaddr validtors to weigh
495 	 * in now, before changes are committed.  The rntl lock is serializing
496 	 * access here, so the state should not change between a validator call
497 	 * and a final notify on commit.  This isn't invoked on promotion under
498 	 * the assumption that validators are checking the address itself, and
499 	 * not the flags.
500 	 */
501 	ivi.ivi_addr = ifa->ifa_address;
502 	ivi.ivi_dev = ifa->ifa_dev;
503 	ivi.extack = extack;
504 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
505 					   NETDEV_UP, &ivi);
506 	ret = notifier_to_errno(ret);
507 	if (ret) {
508 		inet_free_ifa(ifa);
509 		return ret;
510 	}
511 
512 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
513 		prandom_seed((__force u32) ifa->ifa_local);
514 		ifap = last_primary;
515 	}
516 
517 	ifa->ifa_next = *ifap;
518 	*ifap = ifa;
519 
520 	inet_hash_insert(dev_net(in_dev->dev), ifa);
521 
522 	cancel_delayed_work(&check_lifetime_work);
523 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
524 
525 	/* Send message first, then call notifier.
526 	   Notifier will trigger FIB update, so that
527 	   listeners of netlink will know about new ifaddr */
528 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
529 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
530 
531 	return 0;
532 }
533 
534 static int inet_insert_ifa(struct in_ifaddr *ifa)
535 {
536 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
537 }
538 
539 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
540 {
541 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
542 
543 	ASSERT_RTNL();
544 
545 	if (!in_dev) {
546 		inet_free_ifa(ifa);
547 		return -ENOBUFS;
548 	}
549 	ipv4_devconf_setall(in_dev);
550 	neigh_parms_data_state_setall(in_dev->arp_parms);
551 	if (ifa->ifa_dev != in_dev) {
552 		WARN_ON(ifa->ifa_dev);
553 		in_dev_hold(in_dev);
554 		ifa->ifa_dev = in_dev;
555 	}
556 	if (ipv4_is_loopback(ifa->ifa_local))
557 		ifa->ifa_scope = RT_SCOPE_HOST;
558 	return inet_insert_ifa(ifa);
559 }
560 
561 /* Caller must hold RCU or RTNL :
562  * We dont take a reference on found in_device
563  */
564 struct in_device *inetdev_by_index(struct net *net, int ifindex)
565 {
566 	struct net_device *dev;
567 	struct in_device *in_dev = NULL;
568 
569 	rcu_read_lock();
570 	dev = dev_get_by_index_rcu(net, ifindex);
571 	if (dev)
572 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
573 	rcu_read_unlock();
574 	return in_dev;
575 }
576 EXPORT_SYMBOL(inetdev_by_index);
577 
578 /* Called only from RTNL semaphored context. No locks. */
579 
580 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
581 				    __be32 mask)
582 {
583 	ASSERT_RTNL();
584 
585 	for_primary_ifa(in_dev) {
586 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
587 			return ifa;
588 	} endfor_ifa(in_dev);
589 	return NULL;
590 }
591 
592 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
593 {
594 	struct ip_mreqn mreq = {
595 		.imr_multiaddr.s_addr = ifa->ifa_address,
596 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
597 	};
598 	int ret;
599 
600 	ASSERT_RTNL();
601 
602 	lock_sock(sk);
603 	if (join)
604 		ret = ip_mc_join_group(sk, &mreq);
605 	else
606 		ret = ip_mc_leave_group(sk, &mreq);
607 	release_sock(sk);
608 
609 	return ret;
610 }
611 
612 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
613 			    struct netlink_ext_ack *extack)
614 {
615 	struct net *net = sock_net(skb->sk);
616 	struct nlattr *tb[IFA_MAX+1];
617 	struct in_device *in_dev;
618 	struct ifaddrmsg *ifm;
619 	struct in_ifaddr *ifa, **ifap;
620 	int err = -EINVAL;
621 
622 	ASSERT_RTNL();
623 
624 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
625 			  extack);
626 	if (err < 0)
627 		goto errout;
628 
629 	ifm = nlmsg_data(nlh);
630 	in_dev = inetdev_by_index(net, ifm->ifa_index);
631 	if (!in_dev) {
632 		err = -ENODEV;
633 		goto errout;
634 	}
635 
636 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
637 	     ifap = &ifa->ifa_next) {
638 		if (tb[IFA_LOCAL] &&
639 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
640 			continue;
641 
642 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
643 			continue;
644 
645 		if (tb[IFA_ADDRESS] &&
646 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
647 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
648 			continue;
649 
650 		if (ipv4_is_multicast(ifa->ifa_address))
651 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
652 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
653 		return 0;
654 	}
655 
656 	err = -EADDRNOTAVAIL;
657 errout:
658 	return err;
659 }
660 
661 #define INFINITY_LIFE_TIME	0xFFFFFFFF
662 
663 static void check_lifetime(struct work_struct *work)
664 {
665 	unsigned long now, next, next_sec, next_sched;
666 	struct in_ifaddr *ifa;
667 	struct hlist_node *n;
668 	int i;
669 
670 	now = jiffies;
671 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
672 
673 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
674 		bool change_needed = false;
675 
676 		rcu_read_lock();
677 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
678 			unsigned long age;
679 
680 			if (ifa->ifa_flags & IFA_F_PERMANENT)
681 				continue;
682 
683 			/* We try to batch several events at once. */
684 			age = (now - ifa->ifa_tstamp +
685 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
686 
687 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
688 			    age >= ifa->ifa_valid_lft) {
689 				change_needed = true;
690 			} else if (ifa->ifa_preferred_lft ==
691 				   INFINITY_LIFE_TIME) {
692 				continue;
693 			} else if (age >= ifa->ifa_preferred_lft) {
694 				if (time_before(ifa->ifa_tstamp +
695 						ifa->ifa_valid_lft * HZ, next))
696 					next = ifa->ifa_tstamp +
697 					       ifa->ifa_valid_lft * HZ;
698 
699 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
700 					change_needed = true;
701 			} else if (time_before(ifa->ifa_tstamp +
702 					       ifa->ifa_preferred_lft * HZ,
703 					       next)) {
704 				next = ifa->ifa_tstamp +
705 				       ifa->ifa_preferred_lft * HZ;
706 			}
707 		}
708 		rcu_read_unlock();
709 		if (!change_needed)
710 			continue;
711 		rtnl_lock();
712 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
713 			unsigned long age;
714 
715 			if (ifa->ifa_flags & IFA_F_PERMANENT)
716 				continue;
717 
718 			/* We try to batch several events at once. */
719 			age = (now - ifa->ifa_tstamp +
720 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
721 
722 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
723 			    age >= ifa->ifa_valid_lft) {
724 				struct in_ifaddr **ifap;
725 
726 				for (ifap = &ifa->ifa_dev->ifa_list;
727 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
728 					if (*ifap == ifa) {
729 						inet_del_ifa(ifa->ifa_dev,
730 							     ifap, 1);
731 						break;
732 					}
733 				}
734 			} else if (ifa->ifa_preferred_lft !=
735 				   INFINITY_LIFE_TIME &&
736 				   age >= ifa->ifa_preferred_lft &&
737 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
738 				ifa->ifa_flags |= IFA_F_DEPRECATED;
739 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
740 			}
741 		}
742 		rtnl_unlock();
743 	}
744 
745 	next_sec = round_jiffies_up(next);
746 	next_sched = next;
747 
748 	/* If rounded timeout is accurate enough, accept it. */
749 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
750 		next_sched = next_sec;
751 
752 	now = jiffies;
753 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
754 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
755 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
756 
757 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
758 			next_sched - now);
759 }
760 
761 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
762 			     __u32 prefered_lft)
763 {
764 	unsigned long timeout;
765 
766 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
767 
768 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
769 	if (addrconf_finite_timeout(timeout))
770 		ifa->ifa_valid_lft = timeout;
771 	else
772 		ifa->ifa_flags |= IFA_F_PERMANENT;
773 
774 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
775 	if (addrconf_finite_timeout(timeout)) {
776 		if (timeout == 0)
777 			ifa->ifa_flags |= IFA_F_DEPRECATED;
778 		ifa->ifa_preferred_lft = timeout;
779 	}
780 	ifa->ifa_tstamp = jiffies;
781 	if (!ifa->ifa_cstamp)
782 		ifa->ifa_cstamp = ifa->ifa_tstamp;
783 }
784 
785 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
786 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
787 				       struct netlink_ext_ack *extack)
788 {
789 	struct nlattr *tb[IFA_MAX+1];
790 	struct in_ifaddr *ifa;
791 	struct ifaddrmsg *ifm;
792 	struct net_device *dev;
793 	struct in_device *in_dev;
794 	int err;
795 
796 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
797 			  extack);
798 	if (err < 0)
799 		goto errout;
800 
801 	ifm = nlmsg_data(nlh);
802 	err = -EINVAL;
803 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
804 		goto errout;
805 
806 	dev = __dev_get_by_index(net, ifm->ifa_index);
807 	err = -ENODEV;
808 	if (!dev)
809 		goto errout;
810 
811 	in_dev = __in_dev_get_rtnl(dev);
812 	err = -ENOBUFS;
813 	if (!in_dev)
814 		goto errout;
815 
816 	ifa = inet_alloc_ifa();
817 	if (!ifa)
818 		/*
819 		 * A potential indev allocation can be left alive, it stays
820 		 * assigned to its device and is destroy with it.
821 		 */
822 		goto errout;
823 
824 	ipv4_devconf_setall(in_dev);
825 	neigh_parms_data_state_setall(in_dev->arp_parms);
826 	in_dev_hold(in_dev);
827 
828 	if (!tb[IFA_ADDRESS])
829 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
830 
831 	INIT_HLIST_NODE(&ifa->hash);
832 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
833 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
834 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
835 					 ifm->ifa_flags;
836 	ifa->ifa_scope = ifm->ifa_scope;
837 	ifa->ifa_dev = in_dev;
838 
839 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
840 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
841 
842 	if (tb[IFA_BROADCAST])
843 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
844 
845 	if (tb[IFA_LABEL])
846 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
847 	else
848 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
849 
850 	if (tb[IFA_RT_PRIORITY])
851 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
852 
853 	if (tb[IFA_CACHEINFO]) {
854 		struct ifa_cacheinfo *ci;
855 
856 		ci = nla_data(tb[IFA_CACHEINFO]);
857 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
858 			err = -EINVAL;
859 			goto errout_free;
860 		}
861 		*pvalid_lft = ci->ifa_valid;
862 		*pprefered_lft = ci->ifa_prefered;
863 	}
864 
865 	return ifa;
866 
867 errout_free:
868 	inet_free_ifa(ifa);
869 errout:
870 	return ERR_PTR(err);
871 }
872 
873 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
874 {
875 	struct in_device *in_dev = ifa->ifa_dev;
876 	struct in_ifaddr *ifa1, **ifap;
877 
878 	if (!ifa->ifa_local)
879 		return NULL;
880 
881 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
882 	     ifap = &ifa1->ifa_next) {
883 		if (ifa1->ifa_mask == ifa->ifa_mask &&
884 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
885 		    ifa1->ifa_local == ifa->ifa_local)
886 			return ifa1;
887 	}
888 	return NULL;
889 }
890 
891 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
892 			    struct netlink_ext_ack *extack)
893 {
894 	struct net *net = sock_net(skb->sk);
895 	struct in_ifaddr *ifa;
896 	struct in_ifaddr *ifa_existing;
897 	__u32 valid_lft = INFINITY_LIFE_TIME;
898 	__u32 prefered_lft = INFINITY_LIFE_TIME;
899 
900 	ASSERT_RTNL();
901 
902 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
903 	if (IS_ERR(ifa))
904 		return PTR_ERR(ifa);
905 
906 	ifa_existing = find_matching_ifa(ifa);
907 	if (!ifa_existing) {
908 		/* It would be best to check for !NLM_F_CREATE here but
909 		 * userspace already relies on not having to provide this.
910 		 */
911 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
912 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
913 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
914 					       true, ifa);
915 
916 			if (ret < 0) {
917 				inet_free_ifa(ifa);
918 				return ret;
919 			}
920 		}
921 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
922 					 extack);
923 	} else {
924 		u32 new_metric = ifa->ifa_rt_priority;
925 
926 		inet_free_ifa(ifa);
927 
928 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
929 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
930 			return -EEXIST;
931 		ifa = ifa_existing;
932 
933 		if (ifa->ifa_rt_priority != new_metric) {
934 			fib_modify_prefix_metric(ifa, new_metric);
935 			ifa->ifa_rt_priority = new_metric;
936 		}
937 
938 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
939 		cancel_delayed_work(&check_lifetime_work);
940 		queue_delayed_work(system_power_efficient_wq,
941 				&check_lifetime_work, 0);
942 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
943 	}
944 	return 0;
945 }
946 
947 /*
948  *	Determine a default network mask, based on the IP address.
949  */
950 
951 static int inet_abc_len(__be32 addr)
952 {
953 	int rc = -1;	/* Something else, probably a multicast. */
954 
955 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
956 		rc = 0;
957 	else {
958 		__u32 haddr = ntohl(addr);
959 		if (IN_CLASSA(haddr))
960 			rc = 8;
961 		else if (IN_CLASSB(haddr))
962 			rc = 16;
963 		else if (IN_CLASSC(haddr))
964 			rc = 24;
965 		else if (IN_CLASSE(haddr))
966 			rc = 32;
967 	}
968 
969 	return rc;
970 }
971 
972 
973 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
974 {
975 	struct sockaddr_in sin_orig;
976 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
977 	struct in_device *in_dev;
978 	struct in_ifaddr **ifap = NULL;
979 	struct in_ifaddr *ifa = NULL;
980 	struct net_device *dev;
981 	char *colon;
982 	int ret = -EFAULT;
983 	int tryaddrmatch = 0;
984 
985 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
986 
987 	/* save original address for comparison */
988 	memcpy(&sin_orig, sin, sizeof(*sin));
989 
990 	colon = strchr(ifr->ifr_name, ':');
991 	if (colon)
992 		*colon = 0;
993 
994 	dev_load(net, ifr->ifr_name);
995 
996 	switch (cmd) {
997 	case SIOCGIFADDR:	/* Get interface address */
998 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
999 	case SIOCGIFDSTADDR:	/* Get the destination address */
1000 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1001 		/* Note that these ioctls will not sleep,
1002 		   so that we do not impose a lock.
1003 		   One day we will be forced to put shlock here (I mean SMP)
1004 		 */
1005 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1006 		memset(sin, 0, sizeof(*sin));
1007 		sin->sin_family = AF_INET;
1008 		break;
1009 
1010 	case SIOCSIFFLAGS:
1011 		ret = -EPERM;
1012 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1013 			goto out;
1014 		break;
1015 	case SIOCSIFADDR:	/* Set interface address (and family) */
1016 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1017 	case SIOCSIFDSTADDR:	/* Set the destination address */
1018 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1019 		ret = -EPERM;
1020 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1021 			goto out;
1022 		ret = -EINVAL;
1023 		if (sin->sin_family != AF_INET)
1024 			goto out;
1025 		break;
1026 	default:
1027 		ret = -EINVAL;
1028 		goto out;
1029 	}
1030 
1031 	rtnl_lock();
1032 
1033 	ret = -ENODEV;
1034 	dev = __dev_get_by_name(net, ifr->ifr_name);
1035 	if (!dev)
1036 		goto done;
1037 
1038 	if (colon)
1039 		*colon = ':';
1040 
1041 	in_dev = __in_dev_get_rtnl(dev);
1042 	if (in_dev) {
1043 		if (tryaddrmatch) {
1044 			/* Matthias Andree */
1045 			/* compare label and address (4.4BSD style) */
1046 			/* note: we only do this for a limited set of ioctls
1047 			   and only if the original address family was AF_INET.
1048 			   This is checked above. */
1049 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1050 			     ifap = &ifa->ifa_next) {
1051 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1052 				    sin_orig.sin_addr.s_addr ==
1053 							ifa->ifa_local) {
1054 					break; /* found */
1055 				}
1056 			}
1057 		}
1058 		/* we didn't get a match, maybe the application is
1059 		   4.3BSD-style and passed in junk so we fall back to
1060 		   comparing just the label */
1061 		if (!ifa) {
1062 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1063 			     ifap = &ifa->ifa_next)
1064 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1065 					break;
1066 		}
1067 	}
1068 
1069 	ret = -EADDRNOTAVAIL;
1070 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1071 		goto done;
1072 
1073 	switch (cmd) {
1074 	case SIOCGIFADDR:	/* Get interface address */
1075 		ret = 0;
1076 		sin->sin_addr.s_addr = ifa->ifa_local;
1077 		break;
1078 
1079 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1080 		ret = 0;
1081 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1082 		break;
1083 
1084 	case SIOCGIFDSTADDR:	/* Get the destination address */
1085 		ret = 0;
1086 		sin->sin_addr.s_addr = ifa->ifa_address;
1087 		break;
1088 
1089 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1090 		ret = 0;
1091 		sin->sin_addr.s_addr = ifa->ifa_mask;
1092 		break;
1093 
1094 	case SIOCSIFFLAGS:
1095 		if (colon) {
1096 			ret = -EADDRNOTAVAIL;
1097 			if (!ifa)
1098 				break;
1099 			ret = 0;
1100 			if (!(ifr->ifr_flags & IFF_UP))
1101 				inet_del_ifa(in_dev, ifap, 1);
1102 			break;
1103 		}
1104 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1105 		break;
1106 
1107 	case SIOCSIFADDR:	/* Set interface address (and family) */
1108 		ret = -EINVAL;
1109 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1110 			break;
1111 
1112 		if (!ifa) {
1113 			ret = -ENOBUFS;
1114 			ifa = inet_alloc_ifa();
1115 			if (!ifa)
1116 				break;
1117 			INIT_HLIST_NODE(&ifa->hash);
1118 			if (colon)
1119 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1120 			else
1121 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1122 		} else {
1123 			ret = 0;
1124 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1125 				break;
1126 			inet_del_ifa(in_dev, ifap, 0);
1127 			ifa->ifa_broadcast = 0;
1128 			ifa->ifa_scope = 0;
1129 		}
1130 
1131 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1132 
1133 		if (!(dev->flags & IFF_POINTOPOINT)) {
1134 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1135 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1136 			if ((dev->flags & IFF_BROADCAST) &&
1137 			    ifa->ifa_prefixlen < 31)
1138 				ifa->ifa_broadcast = ifa->ifa_address |
1139 						     ~ifa->ifa_mask;
1140 		} else {
1141 			ifa->ifa_prefixlen = 32;
1142 			ifa->ifa_mask = inet_make_mask(32);
1143 		}
1144 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1145 		ret = inet_set_ifa(dev, ifa);
1146 		break;
1147 
1148 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1149 		ret = 0;
1150 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1151 			inet_del_ifa(in_dev, ifap, 0);
1152 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1153 			inet_insert_ifa(ifa);
1154 		}
1155 		break;
1156 
1157 	case SIOCSIFDSTADDR:	/* Set the destination address */
1158 		ret = 0;
1159 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1160 			break;
1161 		ret = -EINVAL;
1162 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1163 			break;
1164 		ret = 0;
1165 		inet_del_ifa(in_dev, ifap, 0);
1166 		ifa->ifa_address = sin->sin_addr.s_addr;
1167 		inet_insert_ifa(ifa);
1168 		break;
1169 
1170 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1171 
1172 		/*
1173 		 *	The mask we set must be legal.
1174 		 */
1175 		ret = -EINVAL;
1176 		if (bad_mask(sin->sin_addr.s_addr, 0))
1177 			break;
1178 		ret = 0;
1179 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1180 			__be32 old_mask = ifa->ifa_mask;
1181 			inet_del_ifa(in_dev, ifap, 0);
1182 			ifa->ifa_mask = sin->sin_addr.s_addr;
1183 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1184 
1185 			/* See if current broadcast address matches
1186 			 * with current netmask, then recalculate
1187 			 * the broadcast address. Otherwise it's a
1188 			 * funny address, so don't touch it since
1189 			 * the user seems to know what (s)he's doing...
1190 			 */
1191 			if ((dev->flags & IFF_BROADCAST) &&
1192 			    (ifa->ifa_prefixlen < 31) &&
1193 			    (ifa->ifa_broadcast ==
1194 			     (ifa->ifa_local|~old_mask))) {
1195 				ifa->ifa_broadcast = (ifa->ifa_local |
1196 						      ~sin->sin_addr.s_addr);
1197 			}
1198 			inet_insert_ifa(ifa);
1199 		}
1200 		break;
1201 	}
1202 done:
1203 	rtnl_unlock();
1204 out:
1205 	return ret;
1206 }
1207 
1208 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1209 {
1210 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1211 	struct in_ifaddr *ifa;
1212 	struct ifreq ifr;
1213 	int done = 0;
1214 
1215 	if (WARN_ON(size > sizeof(struct ifreq)))
1216 		goto out;
1217 
1218 	if (!in_dev)
1219 		goto out;
1220 
1221 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1222 		if (!buf) {
1223 			done += size;
1224 			continue;
1225 		}
1226 		if (len < size)
1227 			break;
1228 		memset(&ifr, 0, sizeof(struct ifreq));
1229 		strcpy(ifr.ifr_name, ifa->ifa_label);
1230 
1231 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1232 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1233 								ifa->ifa_local;
1234 
1235 		if (copy_to_user(buf + done, &ifr, size)) {
1236 			done = -EFAULT;
1237 			break;
1238 		}
1239 		len  -= size;
1240 		done += size;
1241 	}
1242 out:
1243 	return done;
1244 }
1245 
1246 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1247 				 int scope)
1248 {
1249 	for_primary_ifa(in_dev) {
1250 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1251 		    ifa->ifa_scope <= scope)
1252 			return ifa->ifa_local;
1253 	} endfor_ifa(in_dev);
1254 
1255 	return 0;
1256 }
1257 
1258 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1259 {
1260 	__be32 addr = 0;
1261 	struct in_device *in_dev;
1262 	struct net *net = dev_net(dev);
1263 	int master_idx;
1264 
1265 	rcu_read_lock();
1266 	in_dev = __in_dev_get_rcu(dev);
1267 	if (!in_dev)
1268 		goto no_in_dev;
1269 
1270 	for_primary_ifa(in_dev) {
1271 		if (ifa->ifa_scope > scope)
1272 			continue;
1273 		if (!dst || inet_ifa_match(dst, ifa)) {
1274 			addr = ifa->ifa_local;
1275 			break;
1276 		}
1277 		if (!addr)
1278 			addr = ifa->ifa_local;
1279 	} endfor_ifa(in_dev);
1280 
1281 	if (addr)
1282 		goto out_unlock;
1283 no_in_dev:
1284 	master_idx = l3mdev_master_ifindex_rcu(dev);
1285 
1286 	/* For VRFs, the VRF device takes the place of the loopback device,
1287 	 * with addresses on it being preferred.  Note in such cases the
1288 	 * loopback device will be among the devices that fail the master_idx
1289 	 * equality check in the loop below.
1290 	 */
1291 	if (master_idx &&
1292 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1293 	    (in_dev = __in_dev_get_rcu(dev))) {
1294 		addr = in_dev_select_addr(in_dev, scope);
1295 		if (addr)
1296 			goto out_unlock;
1297 	}
1298 
1299 	/* Not loopback addresses on loopback should be preferred
1300 	   in this case. It is important that lo is the first interface
1301 	   in dev_base list.
1302 	 */
1303 	for_each_netdev_rcu(net, dev) {
1304 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1305 			continue;
1306 
1307 		in_dev = __in_dev_get_rcu(dev);
1308 		if (!in_dev)
1309 			continue;
1310 
1311 		addr = in_dev_select_addr(in_dev, scope);
1312 		if (addr)
1313 			goto out_unlock;
1314 	}
1315 out_unlock:
1316 	rcu_read_unlock();
1317 	return addr;
1318 }
1319 EXPORT_SYMBOL(inet_select_addr);
1320 
1321 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1322 			      __be32 local, int scope)
1323 {
1324 	int same = 0;
1325 	__be32 addr = 0;
1326 
1327 	for_ifa(in_dev) {
1328 		if (!addr &&
1329 		    (local == ifa->ifa_local || !local) &&
1330 		    ifa->ifa_scope <= scope) {
1331 			addr = ifa->ifa_local;
1332 			if (same)
1333 				break;
1334 		}
1335 		if (!same) {
1336 			same = (!local || inet_ifa_match(local, ifa)) &&
1337 				(!dst || inet_ifa_match(dst, ifa));
1338 			if (same && addr) {
1339 				if (local || !dst)
1340 					break;
1341 				/* Is the selected addr into dst subnet? */
1342 				if (inet_ifa_match(addr, ifa))
1343 					break;
1344 				/* No, then can we use new local src? */
1345 				if (ifa->ifa_scope <= scope) {
1346 					addr = ifa->ifa_local;
1347 					break;
1348 				}
1349 				/* search for large dst subnet for addr */
1350 				same = 0;
1351 			}
1352 		}
1353 	} endfor_ifa(in_dev);
1354 
1355 	return same ? addr : 0;
1356 }
1357 
1358 /*
1359  * Confirm that local IP address exists using wildcards:
1360  * - net: netns to check, cannot be NULL
1361  * - in_dev: only on this interface, NULL=any interface
1362  * - dst: only in the same subnet as dst, 0=any dst
1363  * - local: address, 0=autoselect the local address
1364  * - scope: maximum allowed scope value for the local address
1365  */
1366 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1367 			 __be32 dst, __be32 local, int scope)
1368 {
1369 	__be32 addr = 0;
1370 	struct net_device *dev;
1371 
1372 	if (in_dev)
1373 		return confirm_addr_indev(in_dev, dst, local, scope);
1374 
1375 	rcu_read_lock();
1376 	for_each_netdev_rcu(net, dev) {
1377 		in_dev = __in_dev_get_rcu(dev);
1378 		if (in_dev) {
1379 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1380 			if (addr)
1381 				break;
1382 		}
1383 	}
1384 	rcu_read_unlock();
1385 
1386 	return addr;
1387 }
1388 EXPORT_SYMBOL(inet_confirm_addr);
1389 
1390 /*
1391  *	Device notifier
1392  */
1393 
1394 int register_inetaddr_notifier(struct notifier_block *nb)
1395 {
1396 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1397 }
1398 EXPORT_SYMBOL(register_inetaddr_notifier);
1399 
1400 int unregister_inetaddr_notifier(struct notifier_block *nb)
1401 {
1402 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1403 }
1404 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1405 
1406 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1407 {
1408 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1409 }
1410 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1411 
1412 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1413 {
1414 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1415 	    nb);
1416 }
1417 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1418 
1419 /* Rename ifa_labels for a device name change. Make some effort to preserve
1420  * existing alias numbering and to create unique labels if possible.
1421 */
1422 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1423 {
1424 	struct in_ifaddr *ifa;
1425 	int named = 0;
1426 
1427 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1428 		char old[IFNAMSIZ], *dot;
1429 
1430 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1431 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1432 		if (named++ == 0)
1433 			goto skip;
1434 		dot = strchr(old, ':');
1435 		if (!dot) {
1436 			sprintf(old, ":%d", named);
1437 			dot = old;
1438 		}
1439 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1440 			strcat(ifa->ifa_label, dot);
1441 		else
1442 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1443 skip:
1444 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1445 	}
1446 }
1447 
1448 static bool inetdev_valid_mtu(unsigned int mtu)
1449 {
1450 	return mtu >= IPV4_MIN_MTU;
1451 }
1452 
1453 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1454 					struct in_device *in_dev)
1455 
1456 {
1457 	struct in_ifaddr *ifa;
1458 
1459 	for (ifa = in_dev->ifa_list; ifa;
1460 	     ifa = ifa->ifa_next) {
1461 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1462 			 ifa->ifa_local, dev,
1463 			 ifa->ifa_local, NULL,
1464 			 dev->dev_addr, NULL);
1465 	}
1466 }
1467 
1468 /* Called only under RTNL semaphore */
1469 
1470 static int inetdev_event(struct notifier_block *this, unsigned long event,
1471 			 void *ptr)
1472 {
1473 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1474 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1475 
1476 	ASSERT_RTNL();
1477 
1478 	if (!in_dev) {
1479 		if (event == NETDEV_REGISTER) {
1480 			in_dev = inetdev_init(dev);
1481 			if (IS_ERR(in_dev))
1482 				return notifier_from_errno(PTR_ERR(in_dev));
1483 			if (dev->flags & IFF_LOOPBACK) {
1484 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1485 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1486 			}
1487 		} else if (event == NETDEV_CHANGEMTU) {
1488 			/* Re-enabling IP */
1489 			if (inetdev_valid_mtu(dev->mtu))
1490 				in_dev = inetdev_init(dev);
1491 		}
1492 		goto out;
1493 	}
1494 
1495 	switch (event) {
1496 	case NETDEV_REGISTER:
1497 		pr_debug("%s: bug\n", __func__);
1498 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1499 		break;
1500 	case NETDEV_UP:
1501 		if (!inetdev_valid_mtu(dev->mtu))
1502 			break;
1503 		if (dev->flags & IFF_LOOPBACK) {
1504 			struct in_ifaddr *ifa = inet_alloc_ifa();
1505 
1506 			if (ifa) {
1507 				INIT_HLIST_NODE(&ifa->hash);
1508 				ifa->ifa_local =
1509 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1510 				ifa->ifa_prefixlen = 8;
1511 				ifa->ifa_mask = inet_make_mask(8);
1512 				in_dev_hold(in_dev);
1513 				ifa->ifa_dev = in_dev;
1514 				ifa->ifa_scope = RT_SCOPE_HOST;
1515 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1516 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1517 						 INFINITY_LIFE_TIME);
1518 				ipv4_devconf_setall(in_dev);
1519 				neigh_parms_data_state_setall(in_dev->arp_parms);
1520 				inet_insert_ifa(ifa);
1521 			}
1522 		}
1523 		ip_mc_up(in_dev);
1524 		/* fall through */
1525 	case NETDEV_CHANGEADDR:
1526 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1527 			break;
1528 		/* fall through */
1529 	case NETDEV_NOTIFY_PEERS:
1530 		/* Send gratuitous ARP to notify of link change */
1531 		inetdev_send_gratuitous_arp(dev, in_dev);
1532 		break;
1533 	case NETDEV_DOWN:
1534 		ip_mc_down(in_dev);
1535 		break;
1536 	case NETDEV_PRE_TYPE_CHANGE:
1537 		ip_mc_unmap(in_dev);
1538 		break;
1539 	case NETDEV_POST_TYPE_CHANGE:
1540 		ip_mc_remap(in_dev);
1541 		break;
1542 	case NETDEV_CHANGEMTU:
1543 		if (inetdev_valid_mtu(dev->mtu))
1544 			break;
1545 		/* disable IP when MTU is not enough */
1546 		/* fall through */
1547 	case NETDEV_UNREGISTER:
1548 		inetdev_destroy(in_dev);
1549 		break;
1550 	case NETDEV_CHANGENAME:
1551 		/* Do not notify about label change, this event is
1552 		 * not interesting to applications using netlink.
1553 		 */
1554 		inetdev_changename(dev, in_dev);
1555 
1556 		devinet_sysctl_unregister(in_dev);
1557 		devinet_sysctl_register(in_dev);
1558 		break;
1559 	}
1560 out:
1561 	return NOTIFY_DONE;
1562 }
1563 
1564 static struct notifier_block ip_netdev_notifier = {
1565 	.notifier_call = inetdev_event,
1566 };
1567 
1568 static size_t inet_nlmsg_size(void)
1569 {
1570 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1571 	       + nla_total_size(4) /* IFA_ADDRESS */
1572 	       + nla_total_size(4) /* IFA_LOCAL */
1573 	       + nla_total_size(4) /* IFA_BROADCAST */
1574 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1575 	       + nla_total_size(4)  /* IFA_FLAGS */
1576 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1577 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1578 }
1579 
1580 static inline u32 cstamp_delta(unsigned long cstamp)
1581 {
1582 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1583 }
1584 
1585 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1586 			 unsigned long tstamp, u32 preferred, u32 valid)
1587 {
1588 	struct ifa_cacheinfo ci;
1589 
1590 	ci.cstamp = cstamp_delta(cstamp);
1591 	ci.tstamp = cstamp_delta(tstamp);
1592 	ci.ifa_prefered = preferred;
1593 	ci.ifa_valid = valid;
1594 
1595 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1596 }
1597 
1598 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1599 			    struct inet_fill_args *args)
1600 {
1601 	struct ifaddrmsg *ifm;
1602 	struct nlmsghdr  *nlh;
1603 	u32 preferred, valid;
1604 
1605 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1606 			args->flags);
1607 	if (!nlh)
1608 		return -EMSGSIZE;
1609 
1610 	ifm = nlmsg_data(nlh);
1611 	ifm->ifa_family = AF_INET;
1612 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1613 	ifm->ifa_flags = ifa->ifa_flags;
1614 	ifm->ifa_scope = ifa->ifa_scope;
1615 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1616 
1617 	if (args->netnsid >= 0 &&
1618 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1619 		goto nla_put_failure;
1620 
1621 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1622 		preferred = ifa->ifa_preferred_lft;
1623 		valid = ifa->ifa_valid_lft;
1624 		if (preferred != INFINITY_LIFE_TIME) {
1625 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1626 
1627 			if (preferred > tval)
1628 				preferred -= tval;
1629 			else
1630 				preferred = 0;
1631 			if (valid != INFINITY_LIFE_TIME) {
1632 				if (valid > tval)
1633 					valid -= tval;
1634 				else
1635 					valid = 0;
1636 			}
1637 		}
1638 	} else {
1639 		preferred = INFINITY_LIFE_TIME;
1640 		valid = INFINITY_LIFE_TIME;
1641 	}
1642 	if ((ifa->ifa_address &&
1643 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1644 	    (ifa->ifa_local &&
1645 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1646 	    (ifa->ifa_broadcast &&
1647 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1648 	    (ifa->ifa_label[0] &&
1649 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1650 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1651 	    (ifa->ifa_rt_priority &&
1652 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1653 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1654 			  preferred, valid))
1655 		goto nla_put_failure;
1656 
1657 	nlmsg_end(skb, nlh);
1658 	return 0;
1659 
1660 nla_put_failure:
1661 	nlmsg_cancel(skb, nlh);
1662 	return -EMSGSIZE;
1663 }
1664 
1665 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1666 				      struct inet_fill_args *fillargs,
1667 				      struct net **tgt_net, struct sock *sk,
1668 				      struct netlink_callback *cb)
1669 {
1670 	struct netlink_ext_ack *extack = cb->extack;
1671 	struct nlattr *tb[IFA_MAX+1];
1672 	struct ifaddrmsg *ifm;
1673 	int err, i;
1674 
1675 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1676 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1677 		return -EINVAL;
1678 	}
1679 
1680 	ifm = nlmsg_data(nlh);
1681 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1682 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1683 		return -EINVAL;
1684 	}
1685 
1686 	fillargs->ifindex = ifm->ifa_index;
1687 	if (fillargs->ifindex) {
1688 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1689 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1690 	}
1691 
1692 	err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1693 				 ifa_ipv4_policy, extack);
1694 	if (err < 0)
1695 		return err;
1696 
1697 	for (i = 0; i <= IFA_MAX; ++i) {
1698 		if (!tb[i])
1699 			continue;
1700 
1701 		if (i == IFA_TARGET_NETNSID) {
1702 			struct net *net;
1703 
1704 			fillargs->netnsid = nla_get_s32(tb[i]);
1705 
1706 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1707 			if (IS_ERR(net)) {
1708 				fillargs->netnsid = -1;
1709 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1710 				return PTR_ERR(net);
1711 			}
1712 			*tgt_net = net;
1713 		} else {
1714 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1715 			return -EINVAL;
1716 		}
1717 	}
1718 
1719 	return 0;
1720 }
1721 
1722 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1723 			    struct netlink_callback *cb, int s_ip_idx,
1724 			    struct inet_fill_args *fillargs)
1725 {
1726 	struct in_ifaddr *ifa;
1727 	int ip_idx = 0;
1728 	int err;
1729 
1730 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next, ip_idx++) {
1731 		if (ip_idx < s_ip_idx)
1732 			continue;
1733 
1734 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1735 		if (err < 0)
1736 			goto done;
1737 
1738 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1739 	}
1740 	err = 0;
1741 
1742 done:
1743 	cb->args[2] = ip_idx;
1744 
1745 	return err;
1746 }
1747 
1748 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1749 {
1750 	const struct nlmsghdr *nlh = cb->nlh;
1751 	struct inet_fill_args fillargs = {
1752 		.portid = NETLINK_CB(cb->skb).portid,
1753 		.seq = nlh->nlmsg_seq,
1754 		.event = RTM_NEWADDR,
1755 		.flags = NLM_F_MULTI,
1756 		.netnsid = -1,
1757 	};
1758 	struct net *net = sock_net(skb->sk);
1759 	struct net *tgt_net = net;
1760 	int h, s_h;
1761 	int idx, s_idx;
1762 	int s_ip_idx;
1763 	struct net_device *dev;
1764 	struct in_device *in_dev;
1765 	struct hlist_head *head;
1766 	int err = 0;
1767 
1768 	s_h = cb->args[0];
1769 	s_idx = idx = cb->args[1];
1770 	s_ip_idx = cb->args[2];
1771 
1772 	if (cb->strict_check) {
1773 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1774 						 skb->sk, cb);
1775 		if (err < 0)
1776 			goto put_tgt_net;
1777 
1778 		err = 0;
1779 		if (fillargs.ifindex) {
1780 			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1781 			if (!dev) {
1782 				err = -ENODEV;
1783 				goto put_tgt_net;
1784 			}
1785 
1786 			in_dev = __in_dev_get_rtnl(dev);
1787 			if (in_dev) {
1788 				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1789 						       &fillargs);
1790 			}
1791 			goto put_tgt_net;
1792 		}
1793 	}
1794 
1795 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1796 		idx = 0;
1797 		head = &tgt_net->dev_index_head[h];
1798 		rcu_read_lock();
1799 		cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1800 			  tgt_net->dev_base_seq;
1801 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1802 			if (idx < s_idx)
1803 				goto cont;
1804 			if (h > s_h || idx > s_idx)
1805 				s_ip_idx = 0;
1806 			in_dev = __in_dev_get_rcu(dev);
1807 			if (!in_dev)
1808 				goto cont;
1809 
1810 			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1811 					       &fillargs);
1812 			if (err < 0) {
1813 				rcu_read_unlock();
1814 				goto done;
1815 			}
1816 cont:
1817 			idx++;
1818 		}
1819 		rcu_read_unlock();
1820 	}
1821 
1822 done:
1823 	cb->args[0] = h;
1824 	cb->args[1] = idx;
1825 put_tgt_net:
1826 	if (fillargs.netnsid >= 0)
1827 		put_net(tgt_net);
1828 
1829 	return skb->len ? : err;
1830 }
1831 
1832 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1833 		      u32 portid)
1834 {
1835 	struct inet_fill_args fillargs = {
1836 		.portid = portid,
1837 		.seq = nlh ? nlh->nlmsg_seq : 0,
1838 		.event = event,
1839 		.flags = 0,
1840 		.netnsid = -1,
1841 	};
1842 	struct sk_buff *skb;
1843 	int err = -ENOBUFS;
1844 	struct net *net;
1845 
1846 	net = dev_net(ifa->ifa_dev->dev);
1847 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1848 	if (!skb)
1849 		goto errout;
1850 
1851 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1852 	if (err < 0) {
1853 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1854 		WARN_ON(err == -EMSGSIZE);
1855 		kfree_skb(skb);
1856 		goto errout;
1857 	}
1858 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1859 	return;
1860 errout:
1861 	if (err < 0)
1862 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1863 }
1864 
1865 static size_t inet_get_link_af_size(const struct net_device *dev,
1866 				    u32 ext_filter_mask)
1867 {
1868 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1869 
1870 	if (!in_dev)
1871 		return 0;
1872 
1873 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1874 }
1875 
1876 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1877 			     u32 ext_filter_mask)
1878 {
1879 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1880 	struct nlattr *nla;
1881 	int i;
1882 
1883 	if (!in_dev)
1884 		return -ENODATA;
1885 
1886 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1887 	if (!nla)
1888 		return -EMSGSIZE;
1889 
1890 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1891 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1892 
1893 	return 0;
1894 }
1895 
1896 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1897 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1898 };
1899 
1900 static int inet_validate_link_af(const struct net_device *dev,
1901 				 const struct nlattr *nla)
1902 {
1903 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1904 	int err, rem;
1905 
1906 	if (dev && !__in_dev_get_rcu(dev))
1907 		return -EAFNOSUPPORT;
1908 
1909 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1910 	if (err < 0)
1911 		return err;
1912 
1913 	if (tb[IFLA_INET_CONF]) {
1914 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1915 			int cfgid = nla_type(a);
1916 
1917 			if (nla_len(a) < 4)
1918 				return -EINVAL;
1919 
1920 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1921 				return -EINVAL;
1922 		}
1923 	}
1924 
1925 	return 0;
1926 }
1927 
1928 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1929 {
1930 	struct in_device *in_dev = __in_dev_get_rcu(dev);
1931 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1932 	int rem;
1933 
1934 	if (!in_dev)
1935 		return -EAFNOSUPPORT;
1936 
1937 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1938 		BUG();
1939 
1940 	if (tb[IFLA_INET_CONF]) {
1941 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1942 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1943 	}
1944 
1945 	return 0;
1946 }
1947 
1948 static int inet_netconf_msgsize_devconf(int type)
1949 {
1950 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1951 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1952 	bool all = false;
1953 
1954 	if (type == NETCONFA_ALL)
1955 		all = true;
1956 
1957 	if (all || type == NETCONFA_FORWARDING)
1958 		size += nla_total_size(4);
1959 	if (all || type == NETCONFA_RP_FILTER)
1960 		size += nla_total_size(4);
1961 	if (all || type == NETCONFA_MC_FORWARDING)
1962 		size += nla_total_size(4);
1963 	if (all || type == NETCONFA_BC_FORWARDING)
1964 		size += nla_total_size(4);
1965 	if (all || type == NETCONFA_PROXY_NEIGH)
1966 		size += nla_total_size(4);
1967 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1968 		size += nla_total_size(4);
1969 
1970 	return size;
1971 }
1972 
1973 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1974 				     struct ipv4_devconf *devconf, u32 portid,
1975 				     u32 seq, int event, unsigned int flags,
1976 				     int type)
1977 {
1978 	struct nlmsghdr  *nlh;
1979 	struct netconfmsg *ncm;
1980 	bool all = false;
1981 
1982 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1983 			flags);
1984 	if (!nlh)
1985 		return -EMSGSIZE;
1986 
1987 	if (type == NETCONFA_ALL)
1988 		all = true;
1989 
1990 	ncm = nlmsg_data(nlh);
1991 	ncm->ncm_family = AF_INET;
1992 
1993 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1994 		goto nla_put_failure;
1995 
1996 	if (!devconf)
1997 		goto out;
1998 
1999 	if ((all || type == NETCONFA_FORWARDING) &&
2000 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2001 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2002 		goto nla_put_failure;
2003 	if ((all || type == NETCONFA_RP_FILTER) &&
2004 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2005 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2006 		goto nla_put_failure;
2007 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2008 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2009 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2010 		goto nla_put_failure;
2011 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2012 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2013 			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2014 		goto nla_put_failure;
2015 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2016 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2017 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2018 		goto nla_put_failure;
2019 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2020 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2021 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2022 		goto nla_put_failure;
2023 
2024 out:
2025 	nlmsg_end(skb, nlh);
2026 	return 0;
2027 
2028 nla_put_failure:
2029 	nlmsg_cancel(skb, nlh);
2030 	return -EMSGSIZE;
2031 }
2032 
2033 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2034 				 int ifindex, struct ipv4_devconf *devconf)
2035 {
2036 	struct sk_buff *skb;
2037 	int err = -ENOBUFS;
2038 
2039 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2040 	if (!skb)
2041 		goto errout;
2042 
2043 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2044 					event, 0, type);
2045 	if (err < 0) {
2046 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2047 		WARN_ON(err == -EMSGSIZE);
2048 		kfree_skb(skb);
2049 		goto errout;
2050 	}
2051 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2052 	return;
2053 errout:
2054 	if (err < 0)
2055 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2056 }
2057 
2058 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2059 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2060 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2061 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2062 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2063 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2064 };
2065 
2066 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2067 				      const struct nlmsghdr *nlh,
2068 				      struct nlattr **tb,
2069 				      struct netlink_ext_ack *extack)
2070 {
2071 	int i, err;
2072 
2073 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2074 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2075 		return -EINVAL;
2076 	}
2077 
2078 	if (!netlink_strict_get_check(skb))
2079 		return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb,
2080 				   NETCONFA_MAX, devconf_ipv4_policy, extack);
2081 
2082 	err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb,
2083 				 NETCONFA_MAX, devconf_ipv4_policy, extack);
2084 	if (err)
2085 		return err;
2086 
2087 	for (i = 0; i <= NETCONFA_MAX; i++) {
2088 		if (!tb[i])
2089 			continue;
2090 
2091 		switch (i) {
2092 		case NETCONFA_IFINDEX:
2093 			break;
2094 		default:
2095 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2096 			return -EINVAL;
2097 		}
2098 	}
2099 
2100 	return 0;
2101 }
2102 
2103 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2104 				    struct nlmsghdr *nlh,
2105 				    struct netlink_ext_ack *extack)
2106 {
2107 	struct net *net = sock_net(in_skb->sk);
2108 	struct nlattr *tb[NETCONFA_MAX+1];
2109 	struct sk_buff *skb;
2110 	struct ipv4_devconf *devconf;
2111 	struct in_device *in_dev;
2112 	struct net_device *dev;
2113 	int ifindex;
2114 	int err;
2115 
2116 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2117 	if (err)
2118 		goto errout;
2119 
2120 	err = -EINVAL;
2121 	if (!tb[NETCONFA_IFINDEX])
2122 		goto errout;
2123 
2124 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2125 	switch (ifindex) {
2126 	case NETCONFA_IFINDEX_ALL:
2127 		devconf = net->ipv4.devconf_all;
2128 		break;
2129 	case NETCONFA_IFINDEX_DEFAULT:
2130 		devconf = net->ipv4.devconf_dflt;
2131 		break;
2132 	default:
2133 		dev = __dev_get_by_index(net, ifindex);
2134 		if (!dev)
2135 			goto errout;
2136 		in_dev = __in_dev_get_rtnl(dev);
2137 		if (!in_dev)
2138 			goto errout;
2139 		devconf = &in_dev->cnf;
2140 		break;
2141 	}
2142 
2143 	err = -ENOBUFS;
2144 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2145 	if (!skb)
2146 		goto errout;
2147 
2148 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2149 					NETLINK_CB(in_skb).portid,
2150 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2151 					NETCONFA_ALL);
2152 	if (err < 0) {
2153 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2154 		WARN_ON(err == -EMSGSIZE);
2155 		kfree_skb(skb);
2156 		goto errout;
2157 	}
2158 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2159 errout:
2160 	return err;
2161 }
2162 
2163 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2164 				     struct netlink_callback *cb)
2165 {
2166 	const struct nlmsghdr *nlh = cb->nlh;
2167 	struct net *net = sock_net(skb->sk);
2168 	int h, s_h;
2169 	int idx, s_idx;
2170 	struct net_device *dev;
2171 	struct in_device *in_dev;
2172 	struct hlist_head *head;
2173 
2174 	if (cb->strict_check) {
2175 		struct netlink_ext_ack *extack = cb->extack;
2176 		struct netconfmsg *ncm;
2177 
2178 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2179 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2180 			return -EINVAL;
2181 		}
2182 
2183 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2184 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2185 			return -EINVAL;
2186 		}
2187 	}
2188 
2189 	s_h = cb->args[0];
2190 	s_idx = idx = cb->args[1];
2191 
2192 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2193 		idx = 0;
2194 		head = &net->dev_index_head[h];
2195 		rcu_read_lock();
2196 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2197 			  net->dev_base_seq;
2198 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2199 			if (idx < s_idx)
2200 				goto cont;
2201 			in_dev = __in_dev_get_rcu(dev);
2202 			if (!in_dev)
2203 				goto cont;
2204 
2205 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2206 						      &in_dev->cnf,
2207 						      NETLINK_CB(cb->skb).portid,
2208 						      nlh->nlmsg_seq,
2209 						      RTM_NEWNETCONF,
2210 						      NLM_F_MULTI,
2211 						      NETCONFA_ALL) < 0) {
2212 				rcu_read_unlock();
2213 				goto done;
2214 			}
2215 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2216 cont:
2217 			idx++;
2218 		}
2219 		rcu_read_unlock();
2220 	}
2221 	if (h == NETDEV_HASHENTRIES) {
2222 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2223 					      net->ipv4.devconf_all,
2224 					      NETLINK_CB(cb->skb).portid,
2225 					      nlh->nlmsg_seq,
2226 					      RTM_NEWNETCONF, NLM_F_MULTI,
2227 					      NETCONFA_ALL) < 0)
2228 			goto done;
2229 		else
2230 			h++;
2231 	}
2232 	if (h == NETDEV_HASHENTRIES + 1) {
2233 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2234 					      net->ipv4.devconf_dflt,
2235 					      NETLINK_CB(cb->skb).portid,
2236 					      nlh->nlmsg_seq,
2237 					      RTM_NEWNETCONF, NLM_F_MULTI,
2238 					      NETCONFA_ALL) < 0)
2239 			goto done;
2240 		else
2241 			h++;
2242 	}
2243 done:
2244 	cb->args[0] = h;
2245 	cb->args[1] = idx;
2246 
2247 	return skb->len;
2248 }
2249 
2250 #ifdef CONFIG_SYSCTL
2251 
2252 static void devinet_copy_dflt_conf(struct net *net, int i)
2253 {
2254 	struct net_device *dev;
2255 
2256 	rcu_read_lock();
2257 	for_each_netdev_rcu(net, dev) {
2258 		struct in_device *in_dev;
2259 
2260 		in_dev = __in_dev_get_rcu(dev);
2261 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2262 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2263 	}
2264 	rcu_read_unlock();
2265 }
2266 
2267 /* called with RTNL locked */
2268 static void inet_forward_change(struct net *net)
2269 {
2270 	struct net_device *dev;
2271 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2272 
2273 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2274 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2275 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2276 				    NETCONFA_FORWARDING,
2277 				    NETCONFA_IFINDEX_ALL,
2278 				    net->ipv4.devconf_all);
2279 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2280 				    NETCONFA_FORWARDING,
2281 				    NETCONFA_IFINDEX_DEFAULT,
2282 				    net->ipv4.devconf_dflt);
2283 
2284 	for_each_netdev(net, dev) {
2285 		struct in_device *in_dev;
2286 
2287 		if (on)
2288 			dev_disable_lro(dev);
2289 
2290 		in_dev = __in_dev_get_rtnl(dev);
2291 		if (in_dev) {
2292 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2293 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2294 						    NETCONFA_FORWARDING,
2295 						    dev->ifindex, &in_dev->cnf);
2296 		}
2297 	}
2298 }
2299 
2300 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2301 {
2302 	if (cnf == net->ipv4.devconf_dflt)
2303 		return NETCONFA_IFINDEX_DEFAULT;
2304 	else if (cnf == net->ipv4.devconf_all)
2305 		return NETCONFA_IFINDEX_ALL;
2306 	else {
2307 		struct in_device *idev
2308 			= container_of(cnf, struct in_device, cnf);
2309 		return idev->dev->ifindex;
2310 	}
2311 }
2312 
2313 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2314 			     void __user *buffer,
2315 			     size_t *lenp, loff_t *ppos)
2316 {
2317 	int old_value = *(int *)ctl->data;
2318 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2319 	int new_value = *(int *)ctl->data;
2320 
2321 	if (write) {
2322 		struct ipv4_devconf *cnf = ctl->extra1;
2323 		struct net *net = ctl->extra2;
2324 		int i = (int *)ctl->data - cnf->data;
2325 		int ifindex;
2326 
2327 		set_bit(i, cnf->state);
2328 
2329 		if (cnf == net->ipv4.devconf_dflt)
2330 			devinet_copy_dflt_conf(net, i);
2331 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2332 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2333 			if ((new_value == 0) && (old_value != 0))
2334 				rt_cache_flush(net);
2335 
2336 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2337 		    new_value != old_value)
2338 			rt_cache_flush(net);
2339 
2340 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2341 		    new_value != old_value) {
2342 			ifindex = devinet_conf_ifindex(net, cnf);
2343 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2344 						    NETCONFA_RP_FILTER,
2345 						    ifindex, cnf);
2346 		}
2347 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2348 		    new_value != old_value) {
2349 			ifindex = devinet_conf_ifindex(net, cnf);
2350 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2351 						    NETCONFA_PROXY_NEIGH,
2352 						    ifindex, cnf);
2353 		}
2354 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2355 		    new_value != old_value) {
2356 			ifindex = devinet_conf_ifindex(net, cnf);
2357 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2358 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2359 						    ifindex, cnf);
2360 		}
2361 	}
2362 
2363 	return ret;
2364 }
2365 
2366 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2367 				  void __user *buffer,
2368 				  size_t *lenp, loff_t *ppos)
2369 {
2370 	int *valp = ctl->data;
2371 	int val = *valp;
2372 	loff_t pos = *ppos;
2373 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2374 
2375 	if (write && *valp != val) {
2376 		struct net *net = ctl->extra2;
2377 
2378 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2379 			if (!rtnl_trylock()) {
2380 				/* Restore the original values before restarting */
2381 				*valp = val;
2382 				*ppos = pos;
2383 				return restart_syscall();
2384 			}
2385 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2386 				inet_forward_change(net);
2387 			} else {
2388 				struct ipv4_devconf *cnf = ctl->extra1;
2389 				struct in_device *idev =
2390 					container_of(cnf, struct in_device, cnf);
2391 				if (*valp)
2392 					dev_disable_lro(idev->dev);
2393 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2394 							    NETCONFA_FORWARDING,
2395 							    idev->dev->ifindex,
2396 							    cnf);
2397 			}
2398 			rtnl_unlock();
2399 			rt_cache_flush(net);
2400 		} else
2401 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2402 						    NETCONFA_FORWARDING,
2403 						    NETCONFA_IFINDEX_DEFAULT,
2404 						    net->ipv4.devconf_dflt);
2405 	}
2406 
2407 	return ret;
2408 }
2409 
2410 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2411 				void __user *buffer,
2412 				size_t *lenp, loff_t *ppos)
2413 {
2414 	int *valp = ctl->data;
2415 	int val = *valp;
2416 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2417 	struct net *net = ctl->extra2;
2418 
2419 	if (write && *valp != val)
2420 		rt_cache_flush(net);
2421 
2422 	return ret;
2423 }
2424 
2425 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2426 	{ \
2427 		.procname	= name, \
2428 		.data		= ipv4_devconf.data + \
2429 				  IPV4_DEVCONF_ ## attr - 1, \
2430 		.maxlen		= sizeof(int), \
2431 		.mode		= mval, \
2432 		.proc_handler	= proc, \
2433 		.extra1		= &ipv4_devconf, \
2434 	}
2435 
2436 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2437 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2438 
2439 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2440 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2441 
2442 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2443 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2444 
2445 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2446 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2447 
2448 static struct devinet_sysctl_table {
2449 	struct ctl_table_header *sysctl_header;
2450 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2451 } devinet_sysctl = {
2452 	.devinet_vars = {
2453 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2454 					     devinet_sysctl_forward),
2455 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2456 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2457 
2458 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2459 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2460 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2461 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2462 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2463 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2464 					"accept_source_route"),
2465 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2466 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2467 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2468 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2469 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2470 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2471 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2472 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2473 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2474 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2475 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2476 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2477 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2478 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2479 					"force_igmp_version"),
2480 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2481 					"igmpv2_unsolicited_report_interval"),
2482 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2483 					"igmpv3_unsolicited_report_interval"),
2484 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2485 					"ignore_routes_with_linkdown"),
2486 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2487 					"drop_gratuitous_arp"),
2488 
2489 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2490 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2491 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2492 					      "promote_secondaries"),
2493 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2494 					      "route_localnet"),
2495 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2496 					      "drop_unicast_in_l2_multicast"),
2497 	},
2498 };
2499 
2500 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2501 				     int ifindex, struct ipv4_devconf *p)
2502 {
2503 	int i;
2504 	struct devinet_sysctl_table *t;
2505 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2506 
2507 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2508 	if (!t)
2509 		goto out;
2510 
2511 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2512 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2513 		t->devinet_vars[i].extra1 = p;
2514 		t->devinet_vars[i].extra2 = net;
2515 	}
2516 
2517 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2518 
2519 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2520 	if (!t->sysctl_header)
2521 		goto free;
2522 
2523 	p->sysctl = t;
2524 
2525 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2526 				    ifindex, p);
2527 	return 0;
2528 
2529 free:
2530 	kfree(t);
2531 out:
2532 	return -ENOBUFS;
2533 }
2534 
2535 static void __devinet_sysctl_unregister(struct net *net,
2536 					struct ipv4_devconf *cnf, int ifindex)
2537 {
2538 	struct devinet_sysctl_table *t = cnf->sysctl;
2539 
2540 	if (t) {
2541 		cnf->sysctl = NULL;
2542 		unregister_net_sysctl_table(t->sysctl_header);
2543 		kfree(t);
2544 	}
2545 
2546 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2547 }
2548 
2549 static int devinet_sysctl_register(struct in_device *idev)
2550 {
2551 	int err;
2552 
2553 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2554 		return -EINVAL;
2555 
2556 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2557 	if (err)
2558 		return err;
2559 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2560 					idev->dev->ifindex, &idev->cnf);
2561 	if (err)
2562 		neigh_sysctl_unregister(idev->arp_parms);
2563 	return err;
2564 }
2565 
2566 static void devinet_sysctl_unregister(struct in_device *idev)
2567 {
2568 	struct net *net = dev_net(idev->dev);
2569 
2570 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2571 	neigh_sysctl_unregister(idev->arp_parms);
2572 }
2573 
2574 static struct ctl_table ctl_forward_entry[] = {
2575 	{
2576 		.procname	= "ip_forward",
2577 		.data		= &ipv4_devconf.data[
2578 					IPV4_DEVCONF_FORWARDING - 1],
2579 		.maxlen		= sizeof(int),
2580 		.mode		= 0644,
2581 		.proc_handler	= devinet_sysctl_forward,
2582 		.extra1		= &ipv4_devconf,
2583 		.extra2		= &init_net,
2584 	},
2585 	{ },
2586 };
2587 #endif
2588 
2589 static __net_init int devinet_init_net(struct net *net)
2590 {
2591 	int err;
2592 	struct ipv4_devconf *all, *dflt;
2593 #ifdef CONFIG_SYSCTL
2594 	struct ctl_table *tbl;
2595 	struct ctl_table_header *forw_hdr;
2596 #endif
2597 
2598 	err = -ENOMEM;
2599 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2600 	if (!all)
2601 		goto err_alloc_all;
2602 
2603 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2604 	if (!dflt)
2605 		goto err_alloc_dflt;
2606 
2607 #ifdef CONFIG_SYSCTL
2608 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2609 	if (!tbl)
2610 		goto err_alloc_ctl;
2611 
2612 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2613 	tbl[0].extra1 = all;
2614 	tbl[0].extra2 = net;
2615 #endif
2616 
2617 	if ((!IS_ENABLED(CONFIG_SYSCTL) ||
2618 	     sysctl_devconf_inherit_init_net != 2) &&
2619 	    !net_eq(net, &init_net)) {
2620 		memcpy(all, init_net.ipv4.devconf_all, sizeof(ipv4_devconf));
2621 		memcpy(dflt, init_net.ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt));
2622 	}
2623 
2624 #ifdef CONFIG_SYSCTL
2625 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2626 	if (err < 0)
2627 		goto err_reg_all;
2628 
2629 	err = __devinet_sysctl_register(net, "default",
2630 					NETCONFA_IFINDEX_DEFAULT, dflt);
2631 	if (err < 0)
2632 		goto err_reg_dflt;
2633 
2634 	err = -ENOMEM;
2635 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2636 	if (!forw_hdr)
2637 		goto err_reg_ctl;
2638 	net->ipv4.forw_hdr = forw_hdr;
2639 #endif
2640 
2641 	net->ipv4.devconf_all = all;
2642 	net->ipv4.devconf_dflt = dflt;
2643 	return 0;
2644 
2645 #ifdef CONFIG_SYSCTL
2646 err_reg_ctl:
2647 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2648 err_reg_dflt:
2649 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2650 err_reg_all:
2651 	kfree(tbl);
2652 err_alloc_ctl:
2653 #endif
2654 	kfree(dflt);
2655 err_alloc_dflt:
2656 	kfree(all);
2657 err_alloc_all:
2658 	return err;
2659 }
2660 
2661 static __net_exit void devinet_exit_net(struct net *net)
2662 {
2663 #ifdef CONFIG_SYSCTL
2664 	struct ctl_table *tbl;
2665 
2666 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2667 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2668 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2669 				    NETCONFA_IFINDEX_DEFAULT);
2670 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2671 				    NETCONFA_IFINDEX_ALL);
2672 	kfree(tbl);
2673 #endif
2674 	kfree(net->ipv4.devconf_dflt);
2675 	kfree(net->ipv4.devconf_all);
2676 }
2677 
2678 static __net_initdata struct pernet_operations devinet_ops = {
2679 	.init = devinet_init_net,
2680 	.exit = devinet_exit_net,
2681 };
2682 
2683 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2684 	.family		  = AF_INET,
2685 	.fill_link_af	  = inet_fill_link_af,
2686 	.get_link_af_size = inet_get_link_af_size,
2687 	.validate_link_af = inet_validate_link_af,
2688 	.set_link_af	  = inet_set_link_af,
2689 };
2690 
2691 void __init devinet_init(void)
2692 {
2693 	int i;
2694 
2695 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2696 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2697 
2698 	register_pernet_subsys(&devinet_ops);
2699 
2700 	register_gifconf(PF_INET, inet_gifconf);
2701 	register_netdevice_notifier(&ip_netdev_notifier);
2702 
2703 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2704 
2705 	rtnl_af_register(&inet_af_ops);
2706 
2707 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2708 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2709 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2710 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2711 		      inet_netconf_dump_devconf, 0);
2712 }
2713