xref: /openbmc/linux/net/ipv4/devinet.c (revision 22fc4c4c9fd60427bcda00878cee94e7622cfa7a)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60 
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77 	},
78 };
79 
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81 	.data = {
82 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89 	},
90 };
91 
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94 
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96 	[IFA_LOCAL]     	= { .type = NLA_U32 },
97 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
98 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
99 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
101 	[IFA_FLAGS]		= { .type = NLA_U32 },
102 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
103 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
104 };
105 
106 struct inet_fill_args {
107 	u32 portid;
108 	u32 seq;
109 	int event;
110 	unsigned int flags;
111 	int netnsid;
112 	int ifindex;
113 };
114 
115 #define IN4_ADDR_HSIZE_SHIFT	8
116 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
117 
118 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
119 
120 static u32 inet_addr_hash(const struct net *net, __be32 addr)
121 {
122 	u32 val = (__force u32) addr ^ net_hash_mix(net);
123 
124 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
125 }
126 
127 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
128 {
129 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
130 
131 	ASSERT_RTNL();
132 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
133 }
134 
135 static void inet_hash_remove(struct in_ifaddr *ifa)
136 {
137 	ASSERT_RTNL();
138 	hlist_del_init_rcu(&ifa->hash);
139 }
140 
141 /**
142  * __ip_dev_find - find the first device with a given source address.
143  * @net: the net namespace
144  * @addr: the source address
145  * @devref: if true, take a reference on the found device
146  *
147  * If a caller uses devref=false, it should be protected by RCU, or RTNL
148  */
149 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
150 {
151 	struct net_device *result = NULL;
152 	struct in_ifaddr *ifa;
153 
154 	rcu_read_lock();
155 	ifa = inet_lookup_ifaddr_rcu(net, addr);
156 	if (!ifa) {
157 		struct flowi4 fl4 = { .daddr = addr };
158 		struct fib_result res = { 0 };
159 		struct fib_table *local;
160 
161 		/* Fallback to FIB local table so that communication
162 		 * over loopback subnets work.
163 		 */
164 		local = fib_get_table(net, RT_TABLE_LOCAL);
165 		if (local &&
166 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167 		    res.type == RTN_LOCAL)
168 			result = FIB_RES_DEV(res);
169 	} else {
170 		result = ifa->ifa_dev->dev;
171 	}
172 	if (result && devref)
173 		dev_hold(result);
174 	rcu_read_unlock();
175 	return result;
176 }
177 EXPORT_SYMBOL(__ip_dev_find);
178 
179 /* called under RCU lock */
180 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
181 {
182 	u32 hash = inet_addr_hash(net, addr);
183 	struct in_ifaddr *ifa;
184 
185 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
186 		if (ifa->ifa_local == addr &&
187 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
188 			return ifa;
189 
190 	return NULL;
191 }
192 
193 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
194 
195 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
196 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
197 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
198 			 int destroy);
199 #ifdef CONFIG_SYSCTL
200 static int devinet_sysctl_register(struct in_device *idev);
201 static void devinet_sysctl_unregister(struct in_device *idev);
202 #else
203 static int devinet_sysctl_register(struct in_device *idev)
204 {
205 	return 0;
206 }
207 static void devinet_sysctl_unregister(struct in_device *idev)
208 {
209 }
210 #endif
211 
212 /* Locks all the inet devices. */
213 
214 static struct in_ifaddr *inet_alloc_ifa(void)
215 {
216 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
217 }
218 
219 static void inet_rcu_free_ifa(struct rcu_head *head)
220 {
221 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
222 	if (ifa->ifa_dev)
223 		in_dev_put(ifa->ifa_dev);
224 	kfree(ifa);
225 }
226 
227 static void inet_free_ifa(struct in_ifaddr *ifa)
228 {
229 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
230 }
231 
232 void in_dev_finish_destroy(struct in_device *idev)
233 {
234 	struct net_device *dev = idev->dev;
235 
236 	WARN_ON(idev->ifa_list);
237 	WARN_ON(idev->mc_list);
238 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
239 #ifdef NET_REFCNT_DEBUG
240 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
241 #endif
242 	dev_put(dev);
243 	if (!idev->dead)
244 		pr_err("Freeing alive in_device %p\n", idev);
245 	else
246 		kfree(idev);
247 }
248 EXPORT_SYMBOL(in_dev_finish_destroy);
249 
250 static struct in_device *inetdev_init(struct net_device *dev)
251 {
252 	struct in_device *in_dev;
253 	int err = -ENOMEM;
254 
255 	ASSERT_RTNL();
256 
257 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
258 	if (!in_dev)
259 		goto out;
260 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
261 			sizeof(in_dev->cnf));
262 	in_dev->cnf.sysctl = NULL;
263 	in_dev->dev = dev;
264 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
265 	if (!in_dev->arp_parms)
266 		goto out_kfree;
267 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
268 		dev_disable_lro(dev);
269 	/* Reference in_dev->dev */
270 	dev_hold(dev);
271 	/* Account for reference dev->ip_ptr (below) */
272 	refcount_set(&in_dev->refcnt, 1);
273 
274 	err = devinet_sysctl_register(in_dev);
275 	if (err) {
276 		in_dev->dead = 1;
277 		in_dev_put(in_dev);
278 		in_dev = NULL;
279 		goto out;
280 	}
281 	ip_mc_init_dev(in_dev);
282 	if (dev->flags & IFF_UP)
283 		ip_mc_up(in_dev);
284 
285 	/* we can receive as soon as ip_ptr is set -- do this last */
286 	rcu_assign_pointer(dev->ip_ptr, in_dev);
287 out:
288 	return in_dev ?: ERR_PTR(err);
289 out_kfree:
290 	kfree(in_dev);
291 	in_dev = NULL;
292 	goto out;
293 }
294 
295 static void in_dev_rcu_put(struct rcu_head *head)
296 {
297 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
298 	in_dev_put(idev);
299 }
300 
301 static void inetdev_destroy(struct in_device *in_dev)
302 {
303 	struct in_ifaddr *ifa;
304 	struct net_device *dev;
305 
306 	ASSERT_RTNL();
307 
308 	dev = in_dev->dev;
309 
310 	in_dev->dead = 1;
311 
312 	ip_mc_destroy_dev(in_dev);
313 
314 	while ((ifa = in_dev->ifa_list) != NULL) {
315 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
316 		inet_free_ifa(ifa);
317 	}
318 
319 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
320 
321 	devinet_sysctl_unregister(in_dev);
322 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
323 	arp_ifdown(dev);
324 
325 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
326 }
327 
328 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
329 {
330 	rcu_read_lock();
331 	for_primary_ifa(in_dev) {
332 		if (inet_ifa_match(a, ifa)) {
333 			if (!b || inet_ifa_match(b, ifa)) {
334 				rcu_read_unlock();
335 				return 1;
336 			}
337 		}
338 	} endfor_ifa(in_dev);
339 	rcu_read_unlock();
340 	return 0;
341 }
342 
343 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
344 			 int destroy, struct nlmsghdr *nlh, u32 portid)
345 {
346 	struct in_ifaddr *promote = NULL;
347 	struct in_ifaddr *ifa, *ifa1 = *ifap;
348 	struct in_ifaddr *last_prim = in_dev->ifa_list;
349 	struct in_ifaddr *prev_prom = NULL;
350 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
351 
352 	ASSERT_RTNL();
353 
354 	if (in_dev->dead)
355 		goto no_promotions;
356 
357 	/* 1. Deleting primary ifaddr forces deletion all secondaries
358 	 * unless alias promotion is set
359 	 **/
360 
361 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
362 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
363 
364 		while ((ifa = *ifap1) != NULL) {
365 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
366 			    ifa1->ifa_scope <= ifa->ifa_scope)
367 				last_prim = ifa;
368 
369 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
370 			    ifa1->ifa_mask != ifa->ifa_mask ||
371 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
372 				ifap1 = &ifa->ifa_next;
373 				prev_prom = ifa;
374 				continue;
375 			}
376 
377 			if (!do_promote) {
378 				inet_hash_remove(ifa);
379 				*ifap1 = ifa->ifa_next;
380 
381 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
382 				blocking_notifier_call_chain(&inetaddr_chain,
383 						NETDEV_DOWN, ifa);
384 				inet_free_ifa(ifa);
385 			} else {
386 				promote = ifa;
387 				break;
388 			}
389 		}
390 	}
391 
392 	/* On promotion all secondaries from subnet are changing
393 	 * the primary IP, we must remove all their routes silently
394 	 * and later to add them back with new prefsrc. Do this
395 	 * while all addresses are on the device list.
396 	 */
397 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
398 		if (ifa1->ifa_mask == ifa->ifa_mask &&
399 		    inet_ifa_match(ifa1->ifa_address, ifa))
400 			fib_del_ifaddr(ifa, ifa1);
401 	}
402 
403 no_promotions:
404 	/* 2. Unlink it */
405 
406 	*ifap = ifa1->ifa_next;
407 	inet_hash_remove(ifa1);
408 
409 	/* 3. Announce address deletion */
410 
411 	/* Send message first, then call notifier.
412 	   At first sight, FIB update triggered by notifier
413 	   will refer to already deleted ifaddr, that could confuse
414 	   netlink listeners. It is not true: look, gated sees
415 	   that route deleted and if it still thinks that ifaddr
416 	   is valid, it will try to restore deleted routes... Grr.
417 	   So that, this order is correct.
418 	 */
419 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
420 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
421 
422 	if (promote) {
423 		struct in_ifaddr *next_sec = promote->ifa_next;
424 
425 		if (prev_prom) {
426 			prev_prom->ifa_next = promote->ifa_next;
427 			promote->ifa_next = last_prim->ifa_next;
428 			last_prim->ifa_next = promote;
429 		}
430 
431 		promote->ifa_flags &= ~IFA_F_SECONDARY;
432 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
433 		blocking_notifier_call_chain(&inetaddr_chain,
434 				NETDEV_UP, promote);
435 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
436 			if (ifa1->ifa_mask != ifa->ifa_mask ||
437 			    !inet_ifa_match(ifa1->ifa_address, ifa))
438 					continue;
439 			fib_add_ifaddr(ifa);
440 		}
441 
442 	}
443 	if (destroy)
444 		inet_free_ifa(ifa1);
445 }
446 
447 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
448 			 int destroy)
449 {
450 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
451 }
452 
453 static void check_lifetime(struct work_struct *work);
454 
455 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
456 
457 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
458 			     u32 portid, struct netlink_ext_ack *extack)
459 {
460 	struct in_device *in_dev = ifa->ifa_dev;
461 	struct in_ifaddr *ifa1, **ifap, **last_primary;
462 	struct in_validator_info ivi;
463 	int ret;
464 
465 	ASSERT_RTNL();
466 
467 	if (!ifa->ifa_local) {
468 		inet_free_ifa(ifa);
469 		return 0;
470 	}
471 
472 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
473 	last_primary = &in_dev->ifa_list;
474 
475 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
476 	     ifap = &ifa1->ifa_next) {
477 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
478 		    ifa->ifa_scope <= ifa1->ifa_scope)
479 			last_primary = &ifa1->ifa_next;
480 		if (ifa1->ifa_mask == ifa->ifa_mask &&
481 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
482 			if (ifa1->ifa_local == ifa->ifa_local) {
483 				inet_free_ifa(ifa);
484 				return -EEXIST;
485 			}
486 			if (ifa1->ifa_scope != ifa->ifa_scope) {
487 				inet_free_ifa(ifa);
488 				return -EINVAL;
489 			}
490 			ifa->ifa_flags |= IFA_F_SECONDARY;
491 		}
492 	}
493 
494 	/* Allow any devices that wish to register ifaddr validtors to weigh
495 	 * in now, before changes are committed.  The rntl lock is serializing
496 	 * access here, so the state should not change between a validator call
497 	 * and a final notify on commit.  This isn't invoked on promotion under
498 	 * the assumption that validators are checking the address itself, and
499 	 * not the flags.
500 	 */
501 	ivi.ivi_addr = ifa->ifa_address;
502 	ivi.ivi_dev = ifa->ifa_dev;
503 	ivi.extack = extack;
504 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
505 					   NETDEV_UP, &ivi);
506 	ret = notifier_to_errno(ret);
507 	if (ret) {
508 		inet_free_ifa(ifa);
509 		return ret;
510 	}
511 
512 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
513 		prandom_seed((__force u32) ifa->ifa_local);
514 		ifap = last_primary;
515 	}
516 
517 	ifa->ifa_next = *ifap;
518 	*ifap = ifa;
519 
520 	inet_hash_insert(dev_net(in_dev->dev), ifa);
521 
522 	cancel_delayed_work(&check_lifetime_work);
523 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
524 
525 	/* Send message first, then call notifier.
526 	   Notifier will trigger FIB update, so that
527 	   listeners of netlink will know about new ifaddr */
528 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
529 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
530 
531 	return 0;
532 }
533 
534 static int inet_insert_ifa(struct in_ifaddr *ifa)
535 {
536 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
537 }
538 
539 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
540 {
541 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
542 
543 	ASSERT_RTNL();
544 
545 	if (!in_dev) {
546 		inet_free_ifa(ifa);
547 		return -ENOBUFS;
548 	}
549 	ipv4_devconf_setall(in_dev);
550 	neigh_parms_data_state_setall(in_dev->arp_parms);
551 	if (ifa->ifa_dev != in_dev) {
552 		WARN_ON(ifa->ifa_dev);
553 		in_dev_hold(in_dev);
554 		ifa->ifa_dev = in_dev;
555 	}
556 	if (ipv4_is_loopback(ifa->ifa_local))
557 		ifa->ifa_scope = RT_SCOPE_HOST;
558 	return inet_insert_ifa(ifa);
559 }
560 
561 /* Caller must hold RCU or RTNL :
562  * We dont take a reference on found in_device
563  */
564 struct in_device *inetdev_by_index(struct net *net, int ifindex)
565 {
566 	struct net_device *dev;
567 	struct in_device *in_dev = NULL;
568 
569 	rcu_read_lock();
570 	dev = dev_get_by_index_rcu(net, ifindex);
571 	if (dev)
572 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
573 	rcu_read_unlock();
574 	return in_dev;
575 }
576 EXPORT_SYMBOL(inetdev_by_index);
577 
578 /* Called only from RTNL semaphored context. No locks. */
579 
580 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
581 				    __be32 mask)
582 {
583 	ASSERT_RTNL();
584 
585 	for_primary_ifa(in_dev) {
586 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
587 			return ifa;
588 	} endfor_ifa(in_dev);
589 	return NULL;
590 }
591 
592 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
593 {
594 	struct ip_mreqn mreq = {
595 		.imr_multiaddr.s_addr = ifa->ifa_address,
596 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
597 	};
598 	int ret;
599 
600 	ASSERT_RTNL();
601 
602 	lock_sock(sk);
603 	if (join)
604 		ret = ip_mc_join_group(sk, &mreq);
605 	else
606 		ret = ip_mc_leave_group(sk, &mreq);
607 	release_sock(sk);
608 
609 	return ret;
610 }
611 
612 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
613 			    struct netlink_ext_ack *extack)
614 {
615 	struct net *net = sock_net(skb->sk);
616 	struct nlattr *tb[IFA_MAX+1];
617 	struct in_device *in_dev;
618 	struct ifaddrmsg *ifm;
619 	struct in_ifaddr *ifa, **ifap;
620 	int err = -EINVAL;
621 
622 	ASSERT_RTNL();
623 
624 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
625 			  extack);
626 	if (err < 0)
627 		goto errout;
628 
629 	ifm = nlmsg_data(nlh);
630 	in_dev = inetdev_by_index(net, ifm->ifa_index);
631 	if (!in_dev) {
632 		err = -ENODEV;
633 		goto errout;
634 	}
635 
636 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
637 	     ifap = &ifa->ifa_next) {
638 		if (tb[IFA_LOCAL] &&
639 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
640 			continue;
641 
642 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
643 			continue;
644 
645 		if (tb[IFA_ADDRESS] &&
646 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
647 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
648 			continue;
649 
650 		if (ipv4_is_multicast(ifa->ifa_address))
651 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
652 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
653 		return 0;
654 	}
655 
656 	err = -EADDRNOTAVAIL;
657 errout:
658 	return err;
659 }
660 
661 #define INFINITY_LIFE_TIME	0xFFFFFFFF
662 
663 static void check_lifetime(struct work_struct *work)
664 {
665 	unsigned long now, next, next_sec, next_sched;
666 	struct in_ifaddr *ifa;
667 	struct hlist_node *n;
668 	int i;
669 
670 	now = jiffies;
671 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
672 
673 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
674 		bool change_needed = false;
675 
676 		rcu_read_lock();
677 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
678 			unsigned long age;
679 
680 			if (ifa->ifa_flags & IFA_F_PERMANENT)
681 				continue;
682 
683 			/* We try to batch several events at once. */
684 			age = (now - ifa->ifa_tstamp +
685 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
686 
687 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
688 			    age >= ifa->ifa_valid_lft) {
689 				change_needed = true;
690 			} else if (ifa->ifa_preferred_lft ==
691 				   INFINITY_LIFE_TIME) {
692 				continue;
693 			} else if (age >= ifa->ifa_preferred_lft) {
694 				if (time_before(ifa->ifa_tstamp +
695 						ifa->ifa_valid_lft * HZ, next))
696 					next = ifa->ifa_tstamp +
697 					       ifa->ifa_valid_lft * HZ;
698 
699 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
700 					change_needed = true;
701 			} else if (time_before(ifa->ifa_tstamp +
702 					       ifa->ifa_preferred_lft * HZ,
703 					       next)) {
704 				next = ifa->ifa_tstamp +
705 				       ifa->ifa_preferred_lft * HZ;
706 			}
707 		}
708 		rcu_read_unlock();
709 		if (!change_needed)
710 			continue;
711 		rtnl_lock();
712 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
713 			unsigned long age;
714 
715 			if (ifa->ifa_flags & IFA_F_PERMANENT)
716 				continue;
717 
718 			/* We try to batch several events at once. */
719 			age = (now - ifa->ifa_tstamp +
720 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
721 
722 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
723 			    age >= ifa->ifa_valid_lft) {
724 				struct in_ifaddr **ifap;
725 
726 				for (ifap = &ifa->ifa_dev->ifa_list;
727 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
728 					if (*ifap == ifa) {
729 						inet_del_ifa(ifa->ifa_dev,
730 							     ifap, 1);
731 						break;
732 					}
733 				}
734 			} else if (ifa->ifa_preferred_lft !=
735 				   INFINITY_LIFE_TIME &&
736 				   age >= ifa->ifa_preferred_lft &&
737 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
738 				ifa->ifa_flags |= IFA_F_DEPRECATED;
739 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
740 			}
741 		}
742 		rtnl_unlock();
743 	}
744 
745 	next_sec = round_jiffies_up(next);
746 	next_sched = next;
747 
748 	/* If rounded timeout is accurate enough, accept it. */
749 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
750 		next_sched = next_sec;
751 
752 	now = jiffies;
753 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
754 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
755 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
756 
757 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
758 			next_sched - now);
759 }
760 
761 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
762 			     __u32 prefered_lft)
763 {
764 	unsigned long timeout;
765 
766 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
767 
768 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
769 	if (addrconf_finite_timeout(timeout))
770 		ifa->ifa_valid_lft = timeout;
771 	else
772 		ifa->ifa_flags |= IFA_F_PERMANENT;
773 
774 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
775 	if (addrconf_finite_timeout(timeout)) {
776 		if (timeout == 0)
777 			ifa->ifa_flags |= IFA_F_DEPRECATED;
778 		ifa->ifa_preferred_lft = timeout;
779 	}
780 	ifa->ifa_tstamp = jiffies;
781 	if (!ifa->ifa_cstamp)
782 		ifa->ifa_cstamp = ifa->ifa_tstamp;
783 }
784 
785 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
786 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
787 				       struct netlink_ext_ack *extack)
788 {
789 	struct nlattr *tb[IFA_MAX+1];
790 	struct in_ifaddr *ifa;
791 	struct ifaddrmsg *ifm;
792 	struct net_device *dev;
793 	struct in_device *in_dev;
794 	int err;
795 
796 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
797 			  extack);
798 	if (err < 0)
799 		goto errout;
800 
801 	ifm = nlmsg_data(nlh);
802 	err = -EINVAL;
803 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
804 		goto errout;
805 
806 	dev = __dev_get_by_index(net, ifm->ifa_index);
807 	err = -ENODEV;
808 	if (!dev)
809 		goto errout;
810 
811 	in_dev = __in_dev_get_rtnl(dev);
812 	err = -ENOBUFS;
813 	if (!in_dev)
814 		goto errout;
815 
816 	ifa = inet_alloc_ifa();
817 	if (!ifa)
818 		/*
819 		 * A potential indev allocation can be left alive, it stays
820 		 * assigned to its device and is destroy with it.
821 		 */
822 		goto errout;
823 
824 	ipv4_devconf_setall(in_dev);
825 	neigh_parms_data_state_setall(in_dev->arp_parms);
826 	in_dev_hold(in_dev);
827 
828 	if (!tb[IFA_ADDRESS])
829 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
830 
831 	INIT_HLIST_NODE(&ifa->hash);
832 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
833 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
834 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
835 					 ifm->ifa_flags;
836 	ifa->ifa_scope = ifm->ifa_scope;
837 	ifa->ifa_dev = in_dev;
838 
839 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
840 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
841 
842 	if (tb[IFA_BROADCAST])
843 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
844 
845 	if (tb[IFA_LABEL])
846 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
847 	else
848 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
849 
850 	if (tb[IFA_RT_PRIORITY])
851 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
852 
853 	if (tb[IFA_CACHEINFO]) {
854 		struct ifa_cacheinfo *ci;
855 
856 		ci = nla_data(tb[IFA_CACHEINFO]);
857 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
858 			err = -EINVAL;
859 			goto errout_free;
860 		}
861 		*pvalid_lft = ci->ifa_valid;
862 		*pprefered_lft = ci->ifa_prefered;
863 	}
864 
865 	return ifa;
866 
867 errout_free:
868 	inet_free_ifa(ifa);
869 errout:
870 	return ERR_PTR(err);
871 }
872 
873 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
874 {
875 	struct in_device *in_dev = ifa->ifa_dev;
876 	struct in_ifaddr *ifa1, **ifap;
877 
878 	if (!ifa->ifa_local)
879 		return NULL;
880 
881 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
882 	     ifap = &ifa1->ifa_next) {
883 		if (ifa1->ifa_mask == ifa->ifa_mask &&
884 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
885 		    ifa1->ifa_local == ifa->ifa_local)
886 			return ifa1;
887 	}
888 	return NULL;
889 }
890 
891 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
892 			    struct netlink_ext_ack *extack)
893 {
894 	struct net *net = sock_net(skb->sk);
895 	struct in_ifaddr *ifa;
896 	struct in_ifaddr *ifa_existing;
897 	__u32 valid_lft = INFINITY_LIFE_TIME;
898 	__u32 prefered_lft = INFINITY_LIFE_TIME;
899 
900 	ASSERT_RTNL();
901 
902 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
903 	if (IS_ERR(ifa))
904 		return PTR_ERR(ifa);
905 
906 	ifa_existing = find_matching_ifa(ifa);
907 	if (!ifa_existing) {
908 		/* It would be best to check for !NLM_F_CREATE here but
909 		 * userspace already relies on not having to provide this.
910 		 */
911 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
912 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
913 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
914 					       true, ifa);
915 
916 			if (ret < 0) {
917 				inet_free_ifa(ifa);
918 				return ret;
919 			}
920 		}
921 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
922 					 extack);
923 	} else {
924 		u32 new_metric = ifa->ifa_rt_priority;
925 
926 		inet_free_ifa(ifa);
927 
928 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
929 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
930 			return -EEXIST;
931 		ifa = ifa_existing;
932 
933 		if (ifa->ifa_rt_priority != new_metric) {
934 			fib_modify_prefix_metric(ifa, new_metric);
935 			ifa->ifa_rt_priority = new_metric;
936 		}
937 
938 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
939 		cancel_delayed_work(&check_lifetime_work);
940 		queue_delayed_work(system_power_efficient_wq,
941 				&check_lifetime_work, 0);
942 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
943 	}
944 	return 0;
945 }
946 
947 /*
948  *	Determine a default network mask, based on the IP address.
949  */
950 
951 static int inet_abc_len(__be32 addr)
952 {
953 	int rc = -1;	/* Something else, probably a multicast. */
954 
955 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
956 		rc = 0;
957 	else {
958 		__u32 haddr = ntohl(addr);
959 		if (IN_CLASSA(haddr))
960 			rc = 8;
961 		else if (IN_CLASSB(haddr))
962 			rc = 16;
963 		else if (IN_CLASSC(haddr))
964 			rc = 24;
965 		else if (IN_CLASSE(haddr))
966 			rc = 32;
967 	}
968 
969 	return rc;
970 }
971 
972 
973 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
974 {
975 	struct sockaddr_in sin_orig;
976 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
977 	struct in_device *in_dev;
978 	struct in_ifaddr **ifap = NULL;
979 	struct in_ifaddr *ifa = NULL;
980 	struct net_device *dev;
981 	char *colon;
982 	int ret = -EFAULT;
983 	int tryaddrmatch = 0;
984 
985 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
986 
987 	/* save original address for comparison */
988 	memcpy(&sin_orig, sin, sizeof(*sin));
989 
990 	colon = strchr(ifr->ifr_name, ':');
991 	if (colon)
992 		*colon = 0;
993 
994 	dev_load(net, ifr->ifr_name);
995 
996 	switch (cmd) {
997 	case SIOCGIFADDR:	/* Get interface address */
998 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
999 	case SIOCGIFDSTADDR:	/* Get the destination address */
1000 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1001 		/* Note that these ioctls will not sleep,
1002 		   so that we do not impose a lock.
1003 		   One day we will be forced to put shlock here (I mean SMP)
1004 		 */
1005 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1006 		memset(sin, 0, sizeof(*sin));
1007 		sin->sin_family = AF_INET;
1008 		break;
1009 
1010 	case SIOCSIFFLAGS:
1011 		ret = -EPERM;
1012 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1013 			goto out;
1014 		break;
1015 	case SIOCSIFADDR:	/* Set interface address (and family) */
1016 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1017 	case SIOCSIFDSTADDR:	/* Set the destination address */
1018 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1019 		ret = -EPERM;
1020 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1021 			goto out;
1022 		ret = -EINVAL;
1023 		if (sin->sin_family != AF_INET)
1024 			goto out;
1025 		break;
1026 	default:
1027 		ret = -EINVAL;
1028 		goto out;
1029 	}
1030 
1031 	rtnl_lock();
1032 
1033 	ret = -ENODEV;
1034 	dev = __dev_get_by_name(net, ifr->ifr_name);
1035 	if (!dev)
1036 		goto done;
1037 
1038 	if (colon)
1039 		*colon = ':';
1040 
1041 	in_dev = __in_dev_get_rtnl(dev);
1042 	if (in_dev) {
1043 		if (tryaddrmatch) {
1044 			/* Matthias Andree */
1045 			/* compare label and address (4.4BSD style) */
1046 			/* note: we only do this for a limited set of ioctls
1047 			   and only if the original address family was AF_INET.
1048 			   This is checked above. */
1049 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1050 			     ifap = &ifa->ifa_next) {
1051 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1052 				    sin_orig.sin_addr.s_addr ==
1053 							ifa->ifa_local) {
1054 					break; /* found */
1055 				}
1056 			}
1057 		}
1058 		/* we didn't get a match, maybe the application is
1059 		   4.3BSD-style and passed in junk so we fall back to
1060 		   comparing just the label */
1061 		if (!ifa) {
1062 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1063 			     ifap = &ifa->ifa_next)
1064 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1065 					break;
1066 		}
1067 	}
1068 
1069 	ret = -EADDRNOTAVAIL;
1070 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1071 		goto done;
1072 
1073 	switch (cmd) {
1074 	case SIOCGIFADDR:	/* Get interface address */
1075 		ret = 0;
1076 		sin->sin_addr.s_addr = ifa->ifa_local;
1077 		break;
1078 
1079 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1080 		ret = 0;
1081 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1082 		break;
1083 
1084 	case SIOCGIFDSTADDR:	/* Get the destination address */
1085 		ret = 0;
1086 		sin->sin_addr.s_addr = ifa->ifa_address;
1087 		break;
1088 
1089 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1090 		ret = 0;
1091 		sin->sin_addr.s_addr = ifa->ifa_mask;
1092 		break;
1093 
1094 	case SIOCSIFFLAGS:
1095 		if (colon) {
1096 			ret = -EADDRNOTAVAIL;
1097 			if (!ifa)
1098 				break;
1099 			ret = 0;
1100 			if (!(ifr->ifr_flags & IFF_UP))
1101 				inet_del_ifa(in_dev, ifap, 1);
1102 			break;
1103 		}
1104 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1105 		break;
1106 
1107 	case SIOCSIFADDR:	/* Set interface address (and family) */
1108 		ret = -EINVAL;
1109 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1110 			break;
1111 
1112 		if (!ifa) {
1113 			ret = -ENOBUFS;
1114 			ifa = inet_alloc_ifa();
1115 			if (!ifa)
1116 				break;
1117 			INIT_HLIST_NODE(&ifa->hash);
1118 			if (colon)
1119 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1120 			else
1121 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1122 		} else {
1123 			ret = 0;
1124 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1125 				break;
1126 			inet_del_ifa(in_dev, ifap, 0);
1127 			ifa->ifa_broadcast = 0;
1128 			ifa->ifa_scope = 0;
1129 		}
1130 
1131 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1132 
1133 		if (!(dev->flags & IFF_POINTOPOINT)) {
1134 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1135 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1136 			if ((dev->flags & IFF_BROADCAST) &&
1137 			    ifa->ifa_prefixlen < 31)
1138 				ifa->ifa_broadcast = ifa->ifa_address |
1139 						     ~ifa->ifa_mask;
1140 		} else {
1141 			ifa->ifa_prefixlen = 32;
1142 			ifa->ifa_mask = inet_make_mask(32);
1143 		}
1144 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1145 		ret = inet_set_ifa(dev, ifa);
1146 		break;
1147 
1148 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1149 		ret = 0;
1150 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1151 			inet_del_ifa(in_dev, ifap, 0);
1152 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1153 			inet_insert_ifa(ifa);
1154 		}
1155 		break;
1156 
1157 	case SIOCSIFDSTADDR:	/* Set the destination address */
1158 		ret = 0;
1159 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1160 			break;
1161 		ret = -EINVAL;
1162 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1163 			break;
1164 		ret = 0;
1165 		inet_del_ifa(in_dev, ifap, 0);
1166 		ifa->ifa_address = sin->sin_addr.s_addr;
1167 		inet_insert_ifa(ifa);
1168 		break;
1169 
1170 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1171 
1172 		/*
1173 		 *	The mask we set must be legal.
1174 		 */
1175 		ret = -EINVAL;
1176 		if (bad_mask(sin->sin_addr.s_addr, 0))
1177 			break;
1178 		ret = 0;
1179 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1180 			__be32 old_mask = ifa->ifa_mask;
1181 			inet_del_ifa(in_dev, ifap, 0);
1182 			ifa->ifa_mask = sin->sin_addr.s_addr;
1183 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1184 
1185 			/* See if current broadcast address matches
1186 			 * with current netmask, then recalculate
1187 			 * the broadcast address. Otherwise it's a
1188 			 * funny address, so don't touch it since
1189 			 * the user seems to know what (s)he's doing...
1190 			 */
1191 			if ((dev->flags & IFF_BROADCAST) &&
1192 			    (ifa->ifa_prefixlen < 31) &&
1193 			    (ifa->ifa_broadcast ==
1194 			     (ifa->ifa_local|~old_mask))) {
1195 				ifa->ifa_broadcast = (ifa->ifa_local |
1196 						      ~sin->sin_addr.s_addr);
1197 			}
1198 			inet_insert_ifa(ifa);
1199 		}
1200 		break;
1201 	}
1202 done:
1203 	rtnl_unlock();
1204 out:
1205 	return ret;
1206 }
1207 
1208 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1209 {
1210 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1211 	struct in_ifaddr *ifa;
1212 	struct ifreq ifr;
1213 	int done = 0;
1214 
1215 	if (WARN_ON(size > sizeof(struct ifreq)))
1216 		goto out;
1217 
1218 	if (!in_dev)
1219 		goto out;
1220 
1221 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1222 		if (!buf) {
1223 			done += size;
1224 			continue;
1225 		}
1226 		if (len < size)
1227 			break;
1228 		memset(&ifr, 0, sizeof(struct ifreq));
1229 		strcpy(ifr.ifr_name, ifa->ifa_label);
1230 
1231 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1232 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1233 								ifa->ifa_local;
1234 
1235 		if (copy_to_user(buf + done, &ifr, size)) {
1236 			done = -EFAULT;
1237 			break;
1238 		}
1239 		len  -= size;
1240 		done += size;
1241 	}
1242 out:
1243 	return done;
1244 }
1245 
1246 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1247 				 int scope)
1248 {
1249 	for_primary_ifa(in_dev) {
1250 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1251 		    ifa->ifa_scope <= scope)
1252 			return ifa->ifa_local;
1253 	} endfor_ifa(in_dev);
1254 
1255 	return 0;
1256 }
1257 
1258 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1259 {
1260 	__be32 addr = 0;
1261 	struct in_device *in_dev;
1262 	struct net *net = dev_net(dev);
1263 	int master_idx;
1264 
1265 	rcu_read_lock();
1266 	in_dev = __in_dev_get_rcu(dev);
1267 	if (!in_dev)
1268 		goto no_in_dev;
1269 
1270 	for_primary_ifa(in_dev) {
1271 		if (ifa->ifa_scope > scope)
1272 			continue;
1273 		if (!dst || inet_ifa_match(dst, ifa)) {
1274 			addr = ifa->ifa_local;
1275 			break;
1276 		}
1277 		if (!addr)
1278 			addr = ifa->ifa_local;
1279 	} endfor_ifa(in_dev);
1280 
1281 	if (addr)
1282 		goto out_unlock;
1283 no_in_dev:
1284 	master_idx = l3mdev_master_ifindex_rcu(dev);
1285 
1286 	/* For VRFs, the VRF device takes the place of the loopback device,
1287 	 * with addresses on it being preferred.  Note in such cases the
1288 	 * loopback device will be among the devices that fail the master_idx
1289 	 * equality check in the loop below.
1290 	 */
1291 	if (master_idx &&
1292 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1293 	    (in_dev = __in_dev_get_rcu(dev))) {
1294 		addr = in_dev_select_addr(in_dev, scope);
1295 		if (addr)
1296 			goto out_unlock;
1297 	}
1298 
1299 	/* Not loopback addresses on loopback should be preferred
1300 	   in this case. It is important that lo is the first interface
1301 	   in dev_base list.
1302 	 */
1303 	for_each_netdev_rcu(net, dev) {
1304 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1305 			continue;
1306 
1307 		in_dev = __in_dev_get_rcu(dev);
1308 		if (!in_dev)
1309 			continue;
1310 
1311 		addr = in_dev_select_addr(in_dev, scope);
1312 		if (addr)
1313 			goto out_unlock;
1314 	}
1315 out_unlock:
1316 	rcu_read_unlock();
1317 	return addr;
1318 }
1319 EXPORT_SYMBOL(inet_select_addr);
1320 
1321 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1322 			      __be32 local, int scope)
1323 {
1324 	int same = 0;
1325 	__be32 addr = 0;
1326 
1327 	for_ifa(in_dev) {
1328 		if (!addr &&
1329 		    (local == ifa->ifa_local || !local) &&
1330 		    ifa->ifa_scope <= scope) {
1331 			addr = ifa->ifa_local;
1332 			if (same)
1333 				break;
1334 		}
1335 		if (!same) {
1336 			same = (!local || inet_ifa_match(local, ifa)) &&
1337 				(!dst || inet_ifa_match(dst, ifa));
1338 			if (same && addr) {
1339 				if (local || !dst)
1340 					break;
1341 				/* Is the selected addr into dst subnet? */
1342 				if (inet_ifa_match(addr, ifa))
1343 					break;
1344 				/* No, then can we use new local src? */
1345 				if (ifa->ifa_scope <= scope) {
1346 					addr = ifa->ifa_local;
1347 					break;
1348 				}
1349 				/* search for large dst subnet for addr */
1350 				same = 0;
1351 			}
1352 		}
1353 	} endfor_ifa(in_dev);
1354 
1355 	return same ? addr : 0;
1356 }
1357 
1358 /*
1359  * Confirm that local IP address exists using wildcards:
1360  * - net: netns to check, cannot be NULL
1361  * - in_dev: only on this interface, NULL=any interface
1362  * - dst: only in the same subnet as dst, 0=any dst
1363  * - local: address, 0=autoselect the local address
1364  * - scope: maximum allowed scope value for the local address
1365  */
1366 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1367 			 __be32 dst, __be32 local, int scope)
1368 {
1369 	__be32 addr = 0;
1370 	struct net_device *dev;
1371 
1372 	if (in_dev)
1373 		return confirm_addr_indev(in_dev, dst, local, scope);
1374 
1375 	rcu_read_lock();
1376 	for_each_netdev_rcu(net, dev) {
1377 		in_dev = __in_dev_get_rcu(dev);
1378 		if (in_dev) {
1379 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1380 			if (addr)
1381 				break;
1382 		}
1383 	}
1384 	rcu_read_unlock();
1385 
1386 	return addr;
1387 }
1388 EXPORT_SYMBOL(inet_confirm_addr);
1389 
1390 /*
1391  *	Device notifier
1392  */
1393 
1394 int register_inetaddr_notifier(struct notifier_block *nb)
1395 {
1396 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1397 }
1398 EXPORT_SYMBOL(register_inetaddr_notifier);
1399 
1400 int unregister_inetaddr_notifier(struct notifier_block *nb)
1401 {
1402 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1403 }
1404 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1405 
1406 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1407 {
1408 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1409 }
1410 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1411 
1412 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1413 {
1414 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1415 	    nb);
1416 }
1417 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1418 
1419 /* Rename ifa_labels for a device name change. Make some effort to preserve
1420  * existing alias numbering and to create unique labels if possible.
1421 */
1422 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1423 {
1424 	struct in_ifaddr *ifa;
1425 	int named = 0;
1426 
1427 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1428 		char old[IFNAMSIZ], *dot;
1429 
1430 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1431 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1432 		if (named++ == 0)
1433 			goto skip;
1434 		dot = strchr(old, ':');
1435 		if (!dot) {
1436 			sprintf(old, ":%d", named);
1437 			dot = old;
1438 		}
1439 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1440 			strcat(ifa->ifa_label, dot);
1441 		else
1442 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1443 skip:
1444 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1445 	}
1446 }
1447 
1448 static bool inetdev_valid_mtu(unsigned int mtu)
1449 {
1450 	return mtu >= IPV4_MIN_MTU;
1451 }
1452 
1453 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1454 					struct in_device *in_dev)
1455 
1456 {
1457 	struct in_ifaddr *ifa;
1458 
1459 	for (ifa = in_dev->ifa_list; ifa;
1460 	     ifa = ifa->ifa_next) {
1461 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1462 			 ifa->ifa_local, dev,
1463 			 ifa->ifa_local, NULL,
1464 			 dev->dev_addr, NULL);
1465 	}
1466 }
1467 
1468 /* Called only under RTNL semaphore */
1469 
1470 static int inetdev_event(struct notifier_block *this, unsigned long event,
1471 			 void *ptr)
1472 {
1473 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1474 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1475 
1476 	ASSERT_RTNL();
1477 
1478 	if (!in_dev) {
1479 		if (event == NETDEV_REGISTER) {
1480 			in_dev = inetdev_init(dev);
1481 			if (IS_ERR(in_dev))
1482 				return notifier_from_errno(PTR_ERR(in_dev));
1483 			if (dev->flags & IFF_LOOPBACK) {
1484 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1485 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1486 			}
1487 		} else if (event == NETDEV_CHANGEMTU) {
1488 			/* Re-enabling IP */
1489 			if (inetdev_valid_mtu(dev->mtu))
1490 				in_dev = inetdev_init(dev);
1491 		}
1492 		goto out;
1493 	}
1494 
1495 	switch (event) {
1496 	case NETDEV_REGISTER:
1497 		pr_debug("%s: bug\n", __func__);
1498 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1499 		break;
1500 	case NETDEV_UP:
1501 		if (!inetdev_valid_mtu(dev->mtu))
1502 			break;
1503 		if (dev->flags & IFF_LOOPBACK) {
1504 			struct in_ifaddr *ifa = inet_alloc_ifa();
1505 
1506 			if (ifa) {
1507 				INIT_HLIST_NODE(&ifa->hash);
1508 				ifa->ifa_local =
1509 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1510 				ifa->ifa_prefixlen = 8;
1511 				ifa->ifa_mask = inet_make_mask(8);
1512 				in_dev_hold(in_dev);
1513 				ifa->ifa_dev = in_dev;
1514 				ifa->ifa_scope = RT_SCOPE_HOST;
1515 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1516 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1517 						 INFINITY_LIFE_TIME);
1518 				ipv4_devconf_setall(in_dev);
1519 				neigh_parms_data_state_setall(in_dev->arp_parms);
1520 				inet_insert_ifa(ifa);
1521 			}
1522 		}
1523 		ip_mc_up(in_dev);
1524 		/* fall through */
1525 	case NETDEV_CHANGEADDR:
1526 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1527 			break;
1528 		/* fall through */
1529 	case NETDEV_NOTIFY_PEERS:
1530 		/* Send gratuitous ARP to notify of link change */
1531 		inetdev_send_gratuitous_arp(dev, in_dev);
1532 		break;
1533 	case NETDEV_DOWN:
1534 		ip_mc_down(in_dev);
1535 		break;
1536 	case NETDEV_PRE_TYPE_CHANGE:
1537 		ip_mc_unmap(in_dev);
1538 		break;
1539 	case NETDEV_POST_TYPE_CHANGE:
1540 		ip_mc_remap(in_dev);
1541 		break;
1542 	case NETDEV_CHANGEMTU:
1543 		if (inetdev_valid_mtu(dev->mtu))
1544 			break;
1545 		/* disable IP when MTU is not enough */
1546 		/* fall through */
1547 	case NETDEV_UNREGISTER:
1548 		inetdev_destroy(in_dev);
1549 		break;
1550 	case NETDEV_CHANGENAME:
1551 		/* Do not notify about label change, this event is
1552 		 * not interesting to applications using netlink.
1553 		 */
1554 		inetdev_changename(dev, in_dev);
1555 
1556 		devinet_sysctl_unregister(in_dev);
1557 		devinet_sysctl_register(in_dev);
1558 		break;
1559 	}
1560 out:
1561 	return NOTIFY_DONE;
1562 }
1563 
1564 static struct notifier_block ip_netdev_notifier = {
1565 	.notifier_call = inetdev_event,
1566 };
1567 
1568 static size_t inet_nlmsg_size(void)
1569 {
1570 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1571 	       + nla_total_size(4) /* IFA_ADDRESS */
1572 	       + nla_total_size(4) /* IFA_LOCAL */
1573 	       + nla_total_size(4) /* IFA_BROADCAST */
1574 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1575 	       + nla_total_size(4)  /* IFA_FLAGS */
1576 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1577 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1578 }
1579 
1580 static inline u32 cstamp_delta(unsigned long cstamp)
1581 {
1582 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1583 }
1584 
1585 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1586 			 unsigned long tstamp, u32 preferred, u32 valid)
1587 {
1588 	struct ifa_cacheinfo ci;
1589 
1590 	ci.cstamp = cstamp_delta(cstamp);
1591 	ci.tstamp = cstamp_delta(tstamp);
1592 	ci.ifa_prefered = preferred;
1593 	ci.ifa_valid = valid;
1594 
1595 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1596 }
1597 
1598 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1599 			    struct inet_fill_args *args)
1600 {
1601 	struct ifaddrmsg *ifm;
1602 	struct nlmsghdr  *nlh;
1603 	u32 preferred, valid;
1604 
1605 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1606 			args->flags);
1607 	if (!nlh)
1608 		return -EMSGSIZE;
1609 
1610 	ifm = nlmsg_data(nlh);
1611 	ifm->ifa_family = AF_INET;
1612 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1613 	ifm->ifa_flags = ifa->ifa_flags;
1614 	ifm->ifa_scope = ifa->ifa_scope;
1615 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1616 
1617 	if (args->netnsid >= 0 &&
1618 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1619 		goto nla_put_failure;
1620 
1621 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1622 		preferred = ifa->ifa_preferred_lft;
1623 		valid = ifa->ifa_valid_lft;
1624 		if (preferred != INFINITY_LIFE_TIME) {
1625 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1626 
1627 			if (preferred > tval)
1628 				preferred -= tval;
1629 			else
1630 				preferred = 0;
1631 			if (valid != INFINITY_LIFE_TIME) {
1632 				if (valid > tval)
1633 					valid -= tval;
1634 				else
1635 					valid = 0;
1636 			}
1637 		}
1638 	} else {
1639 		preferred = INFINITY_LIFE_TIME;
1640 		valid = INFINITY_LIFE_TIME;
1641 	}
1642 	if ((ifa->ifa_address &&
1643 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1644 	    (ifa->ifa_local &&
1645 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1646 	    (ifa->ifa_broadcast &&
1647 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1648 	    (ifa->ifa_label[0] &&
1649 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1650 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1651 	    (ifa->ifa_rt_priority &&
1652 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1653 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1654 			  preferred, valid))
1655 		goto nla_put_failure;
1656 
1657 	nlmsg_end(skb, nlh);
1658 	return 0;
1659 
1660 nla_put_failure:
1661 	nlmsg_cancel(skb, nlh);
1662 	return -EMSGSIZE;
1663 }
1664 
1665 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1666 				      struct inet_fill_args *fillargs,
1667 				      struct net **tgt_net, struct sock *sk,
1668 				      struct netlink_callback *cb)
1669 {
1670 	struct netlink_ext_ack *extack = cb->extack;
1671 	struct nlattr *tb[IFA_MAX+1];
1672 	struct ifaddrmsg *ifm;
1673 	int err, i;
1674 
1675 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1676 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1677 		return -EINVAL;
1678 	}
1679 
1680 	ifm = nlmsg_data(nlh);
1681 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1682 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1683 		return -EINVAL;
1684 	}
1685 
1686 	fillargs->ifindex = ifm->ifa_index;
1687 	if (fillargs->ifindex) {
1688 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1689 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1690 	}
1691 
1692 	err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1693 				 ifa_ipv4_policy, extack);
1694 	if (err < 0)
1695 		return err;
1696 
1697 	for (i = 0; i <= IFA_MAX; ++i) {
1698 		if (!tb[i])
1699 			continue;
1700 
1701 		if (i == IFA_TARGET_NETNSID) {
1702 			struct net *net;
1703 
1704 			fillargs->netnsid = nla_get_s32(tb[i]);
1705 
1706 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1707 			if (IS_ERR(net)) {
1708 				fillargs->netnsid = -1;
1709 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1710 				return PTR_ERR(net);
1711 			}
1712 			*tgt_net = net;
1713 		} else {
1714 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1715 			return -EINVAL;
1716 		}
1717 	}
1718 
1719 	return 0;
1720 }
1721 
1722 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1723 			    struct netlink_callback *cb, int s_ip_idx,
1724 			    struct inet_fill_args *fillargs)
1725 {
1726 	struct in_ifaddr *ifa;
1727 	int ip_idx = 0;
1728 	int err;
1729 
1730 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next, ip_idx++) {
1731 		if (ip_idx < s_ip_idx)
1732 			continue;
1733 
1734 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1735 		if (err < 0)
1736 			goto done;
1737 
1738 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1739 	}
1740 	err = 0;
1741 
1742 done:
1743 	cb->args[2] = ip_idx;
1744 
1745 	return err;
1746 }
1747 
1748 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1749 {
1750 	const struct nlmsghdr *nlh = cb->nlh;
1751 	struct inet_fill_args fillargs = {
1752 		.portid = NETLINK_CB(cb->skb).portid,
1753 		.seq = nlh->nlmsg_seq,
1754 		.event = RTM_NEWADDR,
1755 		.flags = NLM_F_MULTI,
1756 		.netnsid = -1,
1757 	};
1758 	struct net *net = sock_net(skb->sk);
1759 	struct net *tgt_net = net;
1760 	int h, s_h;
1761 	int idx, s_idx;
1762 	int s_ip_idx;
1763 	struct net_device *dev;
1764 	struct in_device *in_dev;
1765 	struct hlist_head *head;
1766 	int err = 0;
1767 
1768 	s_h = cb->args[0];
1769 	s_idx = idx = cb->args[1];
1770 	s_ip_idx = cb->args[2];
1771 
1772 	if (cb->strict_check) {
1773 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1774 						 skb->sk, cb);
1775 		if (err < 0)
1776 			goto put_tgt_net;
1777 
1778 		err = 0;
1779 		if (fillargs.ifindex) {
1780 			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1781 			if (!dev) {
1782 				err = -ENODEV;
1783 				goto put_tgt_net;
1784 			}
1785 
1786 			in_dev = __in_dev_get_rtnl(dev);
1787 			if (in_dev) {
1788 				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1789 						       &fillargs);
1790 			}
1791 			goto put_tgt_net;
1792 		}
1793 	}
1794 
1795 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1796 		idx = 0;
1797 		head = &tgt_net->dev_index_head[h];
1798 		rcu_read_lock();
1799 		cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1800 			  tgt_net->dev_base_seq;
1801 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1802 			if (idx < s_idx)
1803 				goto cont;
1804 			if (h > s_h || idx > s_idx)
1805 				s_ip_idx = 0;
1806 			in_dev = __in_dev_get_rcu(dev);
1807 			if (!in_dev)
1808 				goto cont;
1809 
1810 			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1811 					       &fillargs);
1812 			if (err < 0) {
1813 				rcu_read_unlock();
1814 				goto done;
1815 			}
1816 cont:
1817 			idx++;
1818 		}
1819 		rcu_read_unlock();
1820 	}
1821 
1822 done:
1823 	cb->args[0] = h;
1824 	cb->args[1] = idx;
1825 put_tgt_net:
1826 	if (fillargs.netnsid >= 0)
1827 		put_net(tgt_net);
1828 
1829 	return skb->len ? : err;
1830 }
1831 
1832 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1833 		      u32 portid)
1834 {
1835 	struct inet_fill_args fillargs = {
1836 		.portid = portid,
1837 		.seq = nlh ? nlh->nlmsg_seq : 0,
1838 		.event = event,
1839 		.flags = 0,
1840 		.netnsid = -1,
1841 	};
1842 	struct sk_buff *skb;
1843 	int err = -ENOBUFS;
1844 	struct net *net;
1845 
1846 	net = dev_net(ifa->ifa_dev->dev);
1847 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1848 	if (!skb)
1849 		goto errout;
1850 
1851 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1852 	if (err < 0) {
1853 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1854 		WARN_ON(err == -EMSGSIZE);
1855 		kfree_skb(skb);
1856 		goto errout;
1857 	}
1858 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1859 	return;
1860 errout:
1861 	if (err < 0)
1862 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1863 }
1864 
1865 static size_t inet_get_link_af_size(const struct net_device *dev,
1866 				    u32 ext_filter_mask)
1867 {
1868 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1869 
1870 	if (!in_dev)
1871 		return 0;
1872 
1873 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1874 }
1875 
1876 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1877 			     u32 ext_filter_mask)
1878 {
1879 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1880 	struct nlattr *nla;
1881 	int i;
1882 
1883 	if (!in_dev)
1884 		return -ENODATA;
1885 
1886 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1887 	if (!nla)
1888 		return -EMSGSIZE;
1889 
1890 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1891 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1892 
1893 	return 0;
1894 }
1895 
1896 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1897 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1898 };
1899 
1900 static int inet_validate_link_af(const struct net_device *dev,
1901 				 const struct nlattr *nla)
1902 {
1903 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1904 	int err, rem;
1905 
1906 	if (dev && !__in_dev_get_rcu(dev))
1907 		return -EAFNOSUPPORT;
1908 
1909 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1910 	if (err < 0)
1911 		return err;
1912 
1913 	if (tb[IFLA_INET_CONF]) {
1914 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1915 			int cfgid = nla_type(a);
1916 
1917 			if (nla_len(a) < 4)
1918 				return -EINVAL;
1919 
1920 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1921 				return -EINVAL;
1922 		}
1923 	}
1924 
1925 	return 0;
1926 }
1927 
1928 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1929 {
1930 	struct in_device *in_dev = __in_dev_get_rcu(dev);
1931 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1932 	int rem;
1933 
1934 	if (!in_dev)
1935 		return -EAFNOSUPPORT;
1936 
1937 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1938 		BUG();
1939 
1940 	if (tb[IFLA_INET_CONF]) {
1941 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1942 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1943 	}
1944 
1945 	return 0;
1946 }
1947 
1948 static int inet_netconf_msgsize_devconf(int type)
1949 {
1950 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1951 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1952 	bool all = false;
1953 
1954 	if (type == NETCONFA_ALL)
1955 		all = true;
1956 
1957 	if (all || type == NETCONFA_FORWARDING)
1958 		size += nla_total_size(4);
1959 	if (all || type == NETCONFA_RP_FILTER)
1960 		size += nla_total_size(4);
1961 	if (all || type == NETCONFA_MC_FORWARDING)
1962 		size += nla_total_size(4);
1963 	if (all || type == NETCONFA_BC_FORWARDING)
1964 		size += nla_total_size(4);
1965 	if (all || type == NETCONFA_PROXY_NEIGH)
1966 		size += nla_total_size(4);
1967 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1968 		size += nla_total_size(4);
1969 
1970 	return size;
1971 }
1972 
1973 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1974 				     struct ipv4_devconf *devconf, u32 portid,
1975 				     u32 seq, int event, unsigned int flags,
1976 				     int type)
1977 {
1978 	struct nlmsghdr  *nlh;
1979 	struct netconfmsg *ncm;
1980 	bool all = false;
1981 
1982 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1983 			flags);
1984 	if (!nlh)
1985 		return -EMSGSIZE;
1986 
1987 	if (type == NETCONFA_ALL)
1988 		all = true;
1989 
1990 	ncm = nlmsg_data(nlh);
1991 	ncm->ncm_family = AF_INET;
1992 
1993 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1994 		goto nla_put_failure;
1995 
1996 	if (!devconf)
1997 		goto out;
1998 
1999 	if ((all || type == NETCONFA_FORWARDING) &&
2000 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2001 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2002 		goto nla_put_failure;
2003 	if ((all || type == NETCONFA_RP_FILTER) &&
2004 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2005 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2006 		goto nla_put_failure;
2007 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2008 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2009 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2010 		goto nla_put_failure;
2011 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2012 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2013 			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2014 		goto nla_put_failure;
2015 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2016 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2017 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2018 		goto nla_put_failure;
2019 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2020 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2021 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2022 		goto nla_put_failure;
2023 
2024 out:
2025 	nlmsg_end(skb, nlh);
2026 	return 0;
2027 
2028 nla_put_failure:
2029 	nlmsg_cancel(skb, nlh);
2030 	return -EMSGSIZE;
2031 }
2032 
2033 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2034 				 int ifindex, struct ipv4_devconf *devconf)
2035 {
2036 	struct sk_buff *skb;
2037 	int err = -ENOBUFS;
2038 
2039 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2040 	if (!skb)
2041 		goto errout;
2042 
2043 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2044 					event, 0, type);
2045 	if (err < 0) {
2046 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2047 		WARN_ON(err == -EMSGSIZE);
2048 		kfree_skb(skb);
2049 		goto errout;
2050 	}
2051 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2052 	return;
2053 errout:
2054 	if (err < 0)
2055 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2056 }
2057 
2058 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2059 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2060 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2061 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2062 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2063 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2064 };
2065 
2066 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2067 				    struct nlmsghdr *nlh,
2068 				    struct netlink_ext_ack *extack)
2069 {
2070 	struct net *net = sock_net(in_skb->sk);
2071 	struct nlattr *tb[NETCONFA_MAX+1];
2072 	struct netconfmsg *ncm;
2073 	struct sk_buff *skb;
2074 	struct ipv4_devconf *devconf;
2075 	struct in_device *in_dev;
2076 	struct net_device *dev;
2077 	int ifindex;
2078 	int err;
2079 
2080 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
2081 			  devconf_ipv4_policy, extack);
2082 	if (err < 0)
2083 		goto errout;
2084 
2085 	err = -EINVAL;
2086 	if (!tb[NETCONFA_IFINDEX])
2087 		goto errout;
2088 
2089 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2090 	switch (ifindex) {
2091 	case NETCONFA_IFINDEX_ALL:
2092 		devconf = net->ipv4.devconf_all;
2093 		break;
2094 	case NETCONFA_IFINDEX_DEFAULT:
2095 		devconf = net->ipv4.devconf_dflt;
2096 		break;
2097 	default:
2098 		dev = __dev_get_by_index(net, ifindex);
2099 		if (!dev)
2100 			goto errout;
2101 		in_dev = __in_dev_get_rtnl(dev);
2102 		if (!in_dev)
2103 			goto errout;
2104 		devconf = &in_dev->cnf;
2105 		break;
2106 	}
2107 
2108 	err = -ENOBUFS;
2109 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2110 	if (!skb)
2111 		goto errout;
2112 
2113 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2114 					NETLINK_CB(in_skb).portid,
2115 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2116 					NETCONFA_ALL);
2117 	if (err < 0) {
2118 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2119 		WARN_ON(err == -EMSGSIZE);
2120 		kfree_skb(skb);
2121 		goto errout;
2122 	}
2123 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2124 errout:
2125 	return err;
2126 }
2127 
2128 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2129 				     struct netlink_callback *cb)
2130 {
2131 	const struct nlmsghdr *nlh = cb->nlh;
2132 	struct net *net = sock_net(skb->sk);
2133 	int h, s_h;
2134 	int idx, s_idx;
2135 	struct net_device *dev;
2136 	struct in_device *in_dev;
2137 	struct hlist_head *head;
2138 
2139 	if (cb->strict_check) {
2140 		struct netlink_ext_ack *extack = cb->extack;
2141 		struct netconfmsg *ncm;
2142 
2143 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2144 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2145 			return -EINVAL;
2146 		}
2147 
2148 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2149 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2150 			return -EINVAL;
2151 		}
2152 	}
2153 
2154 	s_h = cb->args[0];
2155 	s_idx = idx = cb->args[1];
2156 
2157 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2158 		idx = 0;
2159 		head = &net->dev_index_head[h];
2160 		rcu_read_lock();
2161 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2162 			  net->dev_base_seq;
2163 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2164 			if (idx < s_idx)
2165 				goto cont;
2166 			in_dev = __in_dev_get_rcu(dev);
2167 			if (!in_dev)
2168 				goto cont;
2169 
2170 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2171 						      &in_dev->cnf,
2172 						      NETLINK_CB(cb->skb).portid,
2173 						      nlh->nlmsg_seq,
2174 						      RTM_NEWNETCONF,
2175 						      NLM_F_MULTI,
2176 						      NETCONFA_ALL) < 0) {
2177 				rcu_read_unlock();
2178 				goto done;
2179 			}
2180 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2181 cont:
2182 			idx++;
2183 		}
2184 		rcu_read_unlock();
2185 	}
2186 	if (h == NETDEV_HASHENTRIES) {
2187 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2188 					      net->ipv4.devconf_all,
2189 					      NETLINK_CB(cb->skb).portid,
2190 					      nlh->nlmsg_seq,
2191 					      RTM_NEWNETCONF, NLM_F_MULTI,
2192 					      NETCONFA_ALL) < 0)
2193 			goto done;
2194 		else
2195 			h++;
2196 	}
2197 	if (h == NETDEV_HASHENTRIES + 1) {
2198 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2199 					      net->ipv4.devconf_dflt,
2200 					      NETLINK_CB(cb->skb).portid,
2201 					      nlh->nlmsg_seq,
2202 					      RTM_NEWNETCONF, NLM_F_MULTI,
2203 					      NETCONFA_ALL) < 0)
2204 			goto done;
2205 		else
2206 			h++;
2207 	}
2208 done:
2209 	cb->args[0] = h;
2210 	cb->args[1] = idx;
2211 
2212 	return skb->len;
2213 }
2214 
2215 #ifdef CONFIG_SYSCTL
2216 
2217 static void devinet_copy_dflt_conf(struct net *net, int i)
2218 {
2219 	struct net_device *dev;
2220 
2221 	rcu_read_lock();
2222 	for_each_netdev_rcu(net, dev) {
2223 		struct in_device *in_dev;
2224 
2225 		in_dev = __in_dev_get_rcu(dev);
2226 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2227 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2228 	}
2229 	rcu_read_unlock();
2230 }
2231 
2232 /* called with RTNL locked */
2233 static void inet_forward_change(struct net *net)
2234 {
2235 	struct net_device *dev;
2236 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2237 
2238 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2239 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2240 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2241 				    NETCONFA_FORWARDING,
2242 				    NETCONFA_IFINDEX_ALL,
2243 				    net->ipv4.devconf_all);
2244 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2245 				    NETCONFA_FORWARDING,
2246 				    NETCONFA_IFINDEX_DEFAULT,
2247 				    net->ipv4.devconf_dflt);
2248 
2249 	for_each_netdev(net, dev) {
2250 		struct in_device *in_dev;
2251 
2252 		if (on)
2253 			dev_disable_lro(dev);
2254 
2255 		in_dev = __in_dev_get_rtnl(dev);
2256 		if (in_dev) {
2257 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2258 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2259 						    NETCONFA_FORWARDING,
2260 						    dev->ifindex, &in_dev->cnf);
2261 		}
2262 	}
2263 }
2264 
2265 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2266 {
2267 	if (cnf == net->ipv4.devconf_dflt)
2268 		return NETCONFA_IFINDEX_DEFAULT;
2269 	else if (cnf == net->ipv4.devconf_all)
2270 		return NETCONFA_IFINDEX_ALL;
2271 	else {
2272 		struct in_device *idev
2273 			= container_of(cnf, struct in_device, cnf);
2274 		return idev->dev->ifindex;
2275 	}
2276 }
2277 
2278 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2279 			     void __user *buffer,
2280 			     size_t *lenp, loff_t *ppos)
2281 {
2282 	int old_value = *(int *)ctl->data;
2283 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2284 	int new_value = *(int *)ctl->data;
2285 
2286 	if (write) {
2287 		struct ipv4_devconf *cnf = ctl->extra1;
2288 		struct net *net = ctl->extra2;
2289 		int i = (int *)ctl->data - cnf->data;
2290 		int ifindex;
2291 
2292 		set_bit(i, cnf->state);
2293 
2294 		if (cnf == net->ipv4.devconf_dflt)
2295 			devinet_copy_dflt_conf(net, i);
2296 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2297 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2298 			if ((new_value == 0) && (old_value != 0))
2299 				rt_cache_flush(net);
2300 
2301 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2302 		    new_value != old_value)
2303 			rt_cache_flush(net);
2304 
2305 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2306 		    new_value != old_value) {
2307 			ifindex = devinet_conf_ifindex(net, cnf);
2308 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2309 						    NETCONFA_RP_FILTER,
2310 						    ifindex, cnf);
2311 		}
2312 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2313 		    new_value != old_value) {
2314 			ifindex = devinet_conf_ifindex(net, cnf);
2315 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2316 						    NETCONFA_PROXY_NEIGH,
2317 						    ifindex, cnf);
2318 		}
2319 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2320 		    new_value != old_value) {
2321 			ifindex = devinet_conf_ifindex(net, cnf);
2322 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2323 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2324 						    ifindex, cnf);
2325 		}
2326 	}
2327 
2328 	return ret;
2329 }
2330 
2331 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2332 				  void __user *buffer,
2333 				  size_t *lenp, loff_t *ppos)
2334 {
2335 	int *valp = ctl->data;
2336 	int val = *valp;
2337 	loff_t pos = *ppos;
2338 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2339 
2340 	if (write && *valp != val) {
2341 		struct net *net = ctl->extra2;
2342 
2343 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2344 			if (!rtnl_trylock()) {
2345 				/* Restore the original values before restarting */
2346 				*valp = val;
2347 				*ppos = pos;
2348 				return restart_syscall();
2349 			}
2350 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2351 				inet_forward_change(net);
2352 			} else {
2353 				struct ipv4_devconf *cnf = ctl->extra1;
2354 				struct in_device *idev =
2355 					container_of(cnf, struct in_device, cnf);
2356 				if (*valp)
2357 					dev_disable_lro(idev->dev);
2358 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2359 							    NETCONFA_FORWARDING,
2360 							    idev->dev->ifindex,
2361 							    cnf);
2362 			}
2363 			rtnl_unlock();
2364 			rt_cache_flush(net);
2365 		} else
2366 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2367 						    NETCONFA_FORWARDING,
2368 						    NETCONFA_IFINDEX_DEFAULT,
2369 						    net->ipv4.devconf_dflt);
2370 	}
2371 
2372 	return ret;
2373 }
2374 
2375 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2376 				void __user *buffer,
2377 				size_t *lenp, loff_t *ppos)
2378 {
2379 	int *valp = ctl->data;
2380 	int val = *valp;
2381 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2382 	struct net *net = ctl->extra2;
2383 
2384 	if (write && *valp != val)
2385 		rt_cache_flush(net);
2386 
2387 	return ret;
2388 }
2389 
2390 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2391 	{ \
2392 		.procname	= name, \
2393 		.data		= ipv4_devconf.data + \
2394 				  IPV4_DEVCONF_ ## attr - 1, \
2395 		.maxlen		= sizeof(int), \
2396 		.mode		= mval, \
2397 		.proc_handler	= proc, \
2398 		.extra1		= &ipv4_devconf, \
2399 	}
2400 
2401 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2402 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2403 
2404 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2405 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2406 
2407 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2408 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2409 
2410 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2411 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2412 
2413 static struct devinet_sysctl_table {
2414 	struct ctl_table_header *sysctl_header;
2415 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2416 } devinet_sysctl = {
2417 	.devinet_vars = {
2418 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2419 					     devinet_sysctl_forward),
2420 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2421 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2422 
2423 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2424 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2425 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2426 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2427 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2428 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2429 					"accept_source_route"),
2430 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2431 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2432 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2433 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2434 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2435 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2436 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2437 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2438 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2439 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2440 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2441 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2442 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2443 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2444 					"force_igmp_version"),
2445 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2446 					"igmpv2_unsolicited_report_interval"),
2447 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2448 					"igmpv3_unsolicited_report_interval"),
2449 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2450 					"ignore_routes_with_linkdown"),
2451 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2452 					"drop_gratuitous_arp"),
2453 
2454 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2455 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2456 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2457 					      "promote_secondaries"),
2458 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2459 					      "route_localnet"),
2460 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2461 					      "drop_unicast_in_l2_multicast"),
2462 	},
2463 };
2464 
2465 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2466 				     int ifindex, struct ipv4_devconf *p)
2467 {
2468 	int i;
2469 	struct devinet_sysctl_table *t;
2470 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2471 
2472 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2473 	if (!t)
2474 		goto out;
2475 
2476 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2477 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2478 		t->devinet_vars[i].extra1 = p;
2479 		t->devinet_vars[i].extra2 = net;
2480 	}
2481 
2482 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2483 
2484 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2485 	if (!t->sysctl_header)
2486 		goto free;
2487 
2488 	p->sysctl = t;
2489 
2490 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2491 				    ifindex, p);
2492 	return 0;
2493 
2494 free:
2495 	kfree(t);
2496 out:
2497 	return -ENOBUFS;
2498 }
2499 
2500 static void __devinet_sysctl_unregister(struct net *net,
2501 					struct ipv4_devconf *cnf, int ifindex)
2502 {
2503 	struct devinet_sysctl_table *t = cnf->sysctl;
2504 
2505 	if (t) {
2506 		cnf->sysctl = NULL;
2507 		unregister_net_sysctl_table(t->sysctl_header);
2508 		kfree(t);
2509 	}
2510 
2511 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2512 }
2513 
2514 static int devinet_sysctl_register(struct in_device *idev)
2515 {
2516 	int err;
2517 
2518 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2519 		return -EINVAL;
2520 
2521 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2522 	if (err)
2523 		return err;
2524 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2525 					idev->dev->ifindex, &idev->cnf);
2526 	if (err)
2527 		neigh_sysctl_unregister(idev->arp_parms);
2528 	return err;
2529 }
2530 
2531 static void devinet_sysctl_unregister(struct in_device *idev)
2532 {
2533 	struct net *net = dev_net(idev->dev);
2534 
2535 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2536 	neigh_sysctl_unregister(idev->arp_parms);
2537 }
2538 
2539 static struct ctl_table ctl_forward_entry[] = {
2540 	{
2541 		.procname	= "ip_forward",
2542 		.data		= &ipv4_devconf.data[
2543 					IPV4_DEVCONF_FORWARDING - 1],
2544 		.maxlen		= sizeof(int),
2545 		.mode		= 0644,
2546 		.proc_handler	= devinet_sysctl_forward,
2547 		.extra1		= &ipv4_devconf,
2548 		.extra2		= &init_net,
2549 	},
2550 	{ },
2551 };
2552 #endif
2553 
2554 static __net_init int devinet_init_net(struct net *net)
2555 {
2556 	int err;
2557 	struct ipv4_devconf *all, *dflt;
2558 #ifdef CONFIG_SYSCTL
2559 	struct ctl_table *tbl = ctl_forward_entry;
2560 	struct ctl_table_header *forw_hdr;
2561 #endif
2562 
2563 	err = -ENOMEM;
2564 	all = &ipv4_devconf;
2565 	dflt = &ipv4_devconf_dflt;
2566 
2567 	if (!net_eq(net, &init_net)) {
2568 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2569 		if (!all)
2570 			goto err_alloc_all;
2571 
2572 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2573 		if (!dflt)
2574 			goto err_alloc_dflt;
2575 
2576 #ifdef CONFIG_SYSCTL
2577 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2578 		if (!tbl)
2579 			goto err_alloc_ctl;
2580 
2581 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2582 		tbl[0].extra1 = all;
2583 		tbl[0].extra2 = net;
2584 #endif
2585 	}
2586 
2587 #ifdef CONFIG_SYSCTL
2588 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2589 	if (err < 0)
2590 		goto err_reg_all;
2591 
2592 	err = __devinet_sysctl_register(net, "default",
2593 					NETCONFA_IFINDEX_DEFAULT, dflt);
2594 	if (err < 0)
2595 		goto err_reg_dflt;
2596 
2597 	err = -ENOMEM;
2598 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2599 	if (!forw_hdr)
2600 		goto err_reg_ctl;
2601 	net->ipv4.forw_hdr = forw_hdr;
2602 #endif
2603 
2604 	net->ipv4.devconf_all = all;
2605 	net->ipv4.devconf_dflt = dflt;
2606 	return 0;
2607 
2608 #ifdef CONFIG_SYSCTL
2609 err_reg_ctl:
2610 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2611 err_reg_dflt:
2612 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2613 err_reg_all:
2614 	if (tbl != ctl_forward_entry)
2615 		kfree(tbl);
2616 err_alloc_ctl:
2617 #endif
2618 	if (dflt != &ipv4_devconf_dflt)
2619 		kfree(dflt);
2620 err_alloc_dflt:
2621 	if (all != &ipv4_devconf)
2622 		kfree(all);
2623 err_alloc_all:
2624 	return err;
2625 }
2626 
2627 static __net_exit void devinet_exit_net(struct net *net)
2628 {
2629 #ifdef CONFIG_SYSCTL
2630 	struct ctl_table *tbl;
2631 
2632 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2633 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2634 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2635 				    NETCONFA_IFINDEX_DEFAULT);
2636 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2637 				    NETCONFA_IFINDEX_ALL);
2638 	kfree(tbl);
2639 #endif
2640 	kfree(net->ipv4.devconf_dflt);
2641 	kfree(net->ipv4.devconf_all);
2642 }
2643 
2644 static __net_initdata struct pernet_operations devinet_ops = {
2645 	.init = devinet_init_net,
2646 	.exit = devinet_exit_net,
2647 };
2648 
2649 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2650 	.family		  = AF_INET,
2651 	.fill_link_af	  = inet_fill_link_af,
2652 	.get_link_af_size = inet_get_link_af_size,
2653 	.validate_link_af = inet_validate_link_af,
2654 	.set_link_af	  = inet_set_link_af,
2655 };
2656 
2657 void __init devinet_init(void)
2658 {
2659 	int i;
2660 
2661 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2662 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2663 
2664 	register_pernet_subsys(&devinet_ops);
2665 
2666 	register_gifconf(PF_INET, inet_gifconf);
2667 	register_netdevice_notifier(&ip_netdev_notifier);
2668 
2669 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2670 
2671 	rtnl_af_register(&inet_af_ops);
2672 
2673 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2674 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2675 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2676 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2677 		      inet_netconf_dump_devconf, 0);
2678 }
2679