xref: /openbmc/linux/net/ipv4/devinet.c (revision 6d99a79c)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60 
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77 	},
78 };
79 
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81 	.data = {
82 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89 	},
90 };
91 
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94 
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96 	[IFA_LOCAL]     	= { .type = NLA_U32 },
97 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
98 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
99 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
101 	[IFA_FLAGS]		= { .type = NLA_U32 },
102 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
103 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
104 };
105 
106 struct inet_fill_args {
107 	u32 portid;
108 	u32 seq;
109 	int event;
110 	unsigned int flags;
111 	int netnsid;
112 	int ifindex;
113 };
114 
115 #define IN4_ADDR_HSIZE_SHIFT	8
116 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
117 
118 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
119 
120 static u32 inet_addr_hash(const struct net *net, __be32 addr)
121 {
122 	u32 val = (__force u32) addr ^ net_hash_mix(net);
123 
124 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
125 }
126 
127 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
128 {
129 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
130 
131 	ASSERT_RTNL();
132 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
133 }
134 
135 static void inet_hash_remove(struct in_ifaddr *ifa)
136 {
137 	ASSERT_RTNL();
138 	hlist_del_init_rcu(&ifa->hash);
139 }
140 
141 /**
142  * __ip_dev_find - find the first device with a given source address.
143  * @net: the net namespace
144  * @addr: the source address
145  * @devref: if true, take a reference on the found device
146  *
147  * If a caller uses devref=false, it should be protected by RCU, or RTNL
148  */
149 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
150 {
151 	struct net_device *result = NULL;
152 	struct in_ifaddr *ifa;
153 
154 	rcu_read_lock();
155 	ifa = inet_lookup_ifaddr_rcu(net, addr);
156 	if (!ifa) {
157 		struct flowi4 fl4 = { .daddr = addr };
158 		struct fib_result res = { 0 };
159 		struct fib_table *local;
160 
161 		/* Fallback to FIB local table so that communication
162 		 * over loopback subnets work.
163 		 */
164 		local = fib_get_table(net, RT_TABLE_LOCAL);
165 		if (local &&
166 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167 		    res.type == RTN_LOCAL)
168 			result = FIB_RES_DEV(res);
169 	} else {
170 		result = ifa->ifa_dev->dev;
171 	}
172 	if (result && devref)
173 		dev_hold(result);
174 	rcu_read_unlock();
175 	return result;
176 }
177 EXPORT_SYMBOL(__ip_dev_find);
178 
179 /* called under RCU lock */
180 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
181 {
182 	u32 hash = inet_addr_hash(net, addr);
183 	struct in_ifaddr *ifa;
184 
185 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
186 		if (ifa->ifa_local == addr &&
187 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
188 			return ifa;
189 
190 	return NULL;
191 }
192 
193 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
194 
195 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
196 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
197 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
198 			 int destroy);
199 #ifdef CONFIG_SYSCTL
200 static int devinet_sysctl_register(struct in_device *idev);
201 static void devinet_sysctl_unregister(struct in_device *idev);
202 #else
203 static int devinet_sysctl_register(struct in_device *idev)
204 {
205 	return 0;
206 }
207 static void devinet_sysctl_unregister(struct in_device *idev)
208 {
209 }
210 #endif
211 
212 /* Locks all the inet devices. */
213 
214 static struct in_ifaddr *inet_alloc_ifa(void)
215 {
216 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
217 }
218 
219 static void inet_rcu_free_ifa(struct rcu_head *head)
220 {
221 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
222 	if (ifa->ifa_dev)
223 		in_dev_put(ifa->ifa_dev);
224 	kfree(ifa);
225 }
226 
227 static void inet_free_ifa(struct in_ifaddr *ifa)
228 {
229 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
230 }
231 
232 void in_dev_finish_destroy(struct in_device *idev)
233 {
234 	struct net_device *dev = idev->dev;
235 
236 	WARN_ON(idev->ifa_list);
237 	WARN_ON(idev->mc_list);
238 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
239 #ifdef NET_REFCNT_DEBUG
240 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
241 #endif
242 	dev_put(dev);
243 	if (!idev->dead)
244 		pr_err("Freeing alive in_device %p\n", idev);
245 	else
246 		kfree(idev);
247 }
248 EXPORT_SYMBOL(in_dev_finish_destroy);
249 
250 static struct in_device *inetdev_init(struct net_device *dev)
251 {
252 	struct in_device *in_dev;
253 	int err = -ENOMEM;
254 
255 	ASSERT_RTNL();
256 
257 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
258 	if (!in_dev)
259 		goto out;
260 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
261 			sizeof(in_dev->cnf));
262 	in_dev->cnf.sysctl = NULL;
263 	in_dev->dev = dev;
264 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
265 	if (!in_dev->arp_parms)
266 		goto out_kfree;
267 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
268 		dev_disable_lro(dev);
269 	/* Reference in_dev->dev */
270 	dev_hold(dev);
271 	/* Account for reference dev->ip_ptr (below) */
272 	refcount_set(&in_dev->refcnt, 1);
273 
274 	err = devinet_sysctl_register(in_dev);
275 	if (err) {
276 		in_dev->dead = 1;
277 		in_dev_put(in_dev);
278 		in_dev = NULL;
279 		goto out;
280 	}
281 	ip_mc_init_dev(in_dev);
282 	if (dev->flags & IFF_UP)
283 		ip_mc_up(in_dev);
284 
285 	/* we can receive as soon as ip_ptr is set -- do this last */
286 	rcu_assign_pointer(dev->ip_ptr, in_dev);
287 out:
288 	return in_dev ?: ERR_PTR(err);
289 out_kfree:
290 	kfree(in_dev);
291 	in_dev = NULL;
292 	goto out;
293 }
294 
295 static void in_dev_rcu_put(struct rcu_head *head)
296 {
297 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
298 	in_dev_put(idev);
299 }
300 
301 static void inetdev_destroy(struct in_device *in_dev)
302 {
303 	struct in_ifaddr *ifa;
304 	struct net_device *dev;
305 
306 	ASSERT_RTNL();
307 
308 	dev = in_dev->dev;
309 
310 	in_dev->dead = 1;
311 
312 	ip_mc_destroy_dev(in_dev);
313 
314 	while ((ifa = in_dev->ifa_list) != NULL) {
315 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
316 		inet_free_ifa(ifa);
317 	}
318 
319 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
320 
321 	devinet_sysctl_unregister(in_dev);
322 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
323 	arp_ifdown(dev);
324 
325 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
326 }
327 
328 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
329 {
330 	rcu_read_lock();
331 	for_primary_ifa(in_dev) {
332 		if (inet_ifa_match(a, ifa)) {
333 			if (!b || inet_ifa_match(b, ifa)) {
334 				rcu_read_unlock();
335 				return 1;
336 			}
337 		}
338 	} endfor_ifa(in_dev);
339 	rcu_read_unlock();
340 	return 0;
341 }
342 
343 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
344 			 int destroy, struct nlmsghdr *nlh, u32 portid)
345 {
346 	struct in_ifaddr *promote = NULL;
347 	struct in_ifaddr *ifa, *ifa1 = *ifap;
348 	struct in_ifaddr *last_prim = in_dev->ifa_list;
349 	struct in_ifaddr *prev_prom = NULL;
350 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
351 
352 	ASSERT_RTNL();
353 
354 	if (in_dev->dead)
355 		goto no_promotions;
356 
357 	/* 1. Deleting primary ifaddr forces deletion all secondaries
358 	 * unless alias promotion is set
359 	 **/
360 
361 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
362 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
363 
364 		while ((ifa = *ifap1) != NULL) {
365 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
366 			    ifa1->ifa_scope <= ifa->ifa_scope)
367 				last_prim = ifa;
368 
369 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
370 			    ifa1->ifa_mask != ifa->ifa_mask ||
371 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
372 				ifap1 = &ifa->ifa_next;
373 				prev_prom = ifa;
374 				continue;
375 			}
376 
377 			if (!do_promote) {
378 				inet_hash_remove(ifa);
379 				*ifap1 = ifa->ifa_next;
380 
381 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
382 				blocking_notifier_call_chain(&inetaddr_chain,
383 						NETDEV_DOWN, ifa);
384 				inet_free_ifa(ifa);
385 			} else {
386 				promote = ifa;
387 				break;
388 			}
389 		}
390 	}
391 
392 	/* On promotion all secondaries from subnet are changing
393 	 * the primary IP, we must remove all their routes silently
394 	 * and later to add them back with new prefsrc. Do this
395 	 * while all addresses are on the device list.
396 	 */
397 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
398 		if (ifa1->ifa_mask == ifa->ifa_mask &&
399 		    inet_ifa_match(ifa1->ifa_address, ifa))
400 			fib_del_ifaddr(ifa, ifa1);
401 	}
402 
403 no_promotions:
404 	/* 2. Unlink it */
405 
406 	*ifap = ifa1->ifa_next;
407 	inet_hash_remove(ifa1);
408 
409 	/* 3. Announce address deletion */
410 
411 	/* Send message first, then call notifier.
412 	   At first sight, FIB update triggered by notifier
413 	   will refer to already deleted ifaddr, that could confuse
414 	   netlink listeners. It is not true: look, gated sees
415 	   that route deleted and if it still thinks that ifaddr
416 	   is valid, it will try to restore deleted routes... Grr.
417 	   So that, this order is correct.
418 	 */
419 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
420 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
421 
422 	if (promote) {
423 		struct in_ifaddr *next_sec = promote->ifa_next;
424 
425 		if (prev_prom) {
426 			prev_prom->ifa_next = promote->ifa_next;
427 			promote->ifa_next = last_prim->ifa_next;
428 			last_prim->ifa_next = promote;
429 		}
430 
431 		promote->ifa_flags &= ~IFA_F_SECONDARY;
432 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
433 		blocking_notifier_call_chain(&inetaddr_chain,
434 				NETDEV_UP, promote);
435 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
436 			if (ifa1->ifa_mask != ifa->ifa_mask ||
437 			    !inet_ifa_match(ifa1->ifa_address, ifa))
438 					continue;
439 			fib_add_ifaddr(ifa);
440 		}
441 
442 	}
443 	if (destroy)
444 		inet_free_ifa(ifa1);
445 }
446 
447 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
448 			 int destroy)
449 {
450 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
451 }
452 
453 static void check_lifetime(struct work_struct *work);
454 
455 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
456 
457 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
458 			     u32 portid, struct netlink_ext_ack *extack)
459 {
460 	struct in_device *in_dev = ifa->ifa_dev;
461 	struct in_ifaddr *ifa1, **ifap, **last_primary;
462 	struct in_validator_info ivi;
463 	int ret;
464 
465 	ASSERT_RTNL();
466 
467 	if (!ifa->ifa_local) {
468 		inet_free_ifa(ifa);
469 		return 0;
470 	}
471 
472 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
473 	last_primary = &in_dev->ifa_list;
474 
475 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
476 	     ifap = &ifa1->ifa_next) {
477 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
478 		    ifa->ifa_scope <= ifa1->ifa_scope)
479 			last_primary = &ifa1->ifa_next;
480 		if (ifa1->ifa_mask == ifa->ifa_mask &&
481 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
482 			if (ifa1->ifa_local == ifa->ifa_local) {
483 				inet_free_ifa(ifa);
484 				return -EEXIST;
485 			}
486 			if (ifa1->ifa_scope != ifa->ifa_scope) {
487 				inet_free_ifa(ifa);
488 				return -EINVAL;
489 			}
490 			ifa->ifa_flags |= IFA_F_SECONDARY;
491 		}
492 	}
493 
494 	/* Allow any devices that wish to register ifaddr validtors to weigh
495 	 * in now, before changes are committed.  The rntl lock is serializing
496 	 * access here, so the state should not change between a validator call
497 	 * and a final notify on commit.  This isn't invoked on promotion under
498 	 * the assumption that validators are checking the address itself, and
499 	 * not the flags.
500 	 */
501 	ivi.ivi_addr = ifa->ifa_address;
502 	ivi.ivi_dev = ifa->ifa_dev;
503 	ivi.extack = extack;
504 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
505 					   NETDEV_UP, &ivi);
506 	ret = notifier_to_errno(ret);
507 	if (ret) {
508 		inet_free_ifa(ifa);
509 		return ret;
510 	}
511 
512 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
513 		prandom_seed((__force u32) ifa->ifa_local);
514 		ifap = last_primary;
515 	}
516 
517 	ifa->ifa_next = *ifap;
518 	*ifap = ifa;
519 
520 	inet_hash_insert(dev_net(in_dev->dev), ifa);
521 
522 	cancel_delayed_work(&check_lifetime_work);
523 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
524 
525 	/* Send message first, then call notifier.
526 	   Notifier will trigger FIB update, so that
527 	   listeners of netlink will know about new ifaddr */
528 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
529 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
530 
531 	return 0;
532 }
533 
534 static int inet_insert_ifa(struct in_ifaddr *ifa)
535 {
536 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
537 }
538 
539 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
540 {
541 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
542 
543 	ASSERT_RTNL();
544 
545 	if (!in_dev) {
546 		inet_free_ifa(ifa);
547 		return -ENOBUFS;
548 	}
549 	ipv4_devconf_setall(in_dev);
550 	neigh_parms_data_state_setall(in_dev->arp_parms);
551 	if (ifa->ifa_dev != in_dev) {
552 		WARN_ON(ifa->ifa_dev);
553 		in_dev_hold(in_dev);
554 		ifa->ifa_dev = in_dev;
555 	}
556 	if (ipv4_is_loopback(ifa->ifa_local))
557 		ifa->ifa_scope = RT_SCOPE_HOST;
558 	return inet_insert_ifa(ifa);
559 }
560 
561 /* Caller must hold RCU or RTNL :
562  * We dont take a reference on found in_device
563  */
564 struct in_device *inetdev_by_index(struct net *net, int ifindex)
565 {
566 	struct net_device *dev;
567 	struct in_device *in_dev = NULL;
568 
569 	rcu_read_lock();
570 	dev = dev_get_by_index_rcu(net, ifindex);
571 	if (dev)
572 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
573 	rcu_read_unlock();
574 	return in_dev;
575 }
576 EXPORT_SYMBOL(inetdev_by_index);
577 
578 /* Called only from RTNL semaphored context. No locks. */
579 
580 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
581 				    __be32 mask)
582 {
583 	ASSERT_RTNL();
584 
585 	for_primary_ifa(in_dev) {
586 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
587 			return ifa;
588 	} endfor_ifa(in_dev);
589 	return NULL;
590 }
591 
592 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
593 {
594 	struct ip_mreqn mreq = {
595 		.imr_multiaddr.s_addr = ifa->ifa_address,
596 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
597 	};
598 	int ret;
599 
600 	ASSERT_RTNL();
601 
602 	lock_sock(sk);
603 	if (join)
604 		ret = ip_mc_join_group(sk, &mreq);
605 	else
606 		ret = ip_mc_leave_group(sk, &mreq);
607 	release_sock(sk);
608 
609 	return ret;
610 }
611 
612 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
613 			    struct netlink_ext_ack *extack)
614 {
615 	struct net *net = sock_net(skb->sk);
616 	struct nlattr *tb[IFA_MAX+1];
617 	struct in_device *in_dev;
618 	struct ifaddrmsg *ifm;
619 	struct in_ifaddr *ifa, **ifap;
620 	int err = -EINVAL;
621 
622 	ASSERT_RTNL();
623 
624 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
625 			  extack);
626 	if (err < 0)
627 		goto errout;
628 
629 	ifm = nlmsg_data(nlh);
630 	in_dev = inetdev_by_index(net, ifm->ifa_index);
631 	if (!in_dev) {
632 		err = -ENODEV;
633 		goto errout;
634 	}
635 
636 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
637 	     ifap = &ifa->ifa_next) {
638 		if (tb[IFA_LOCAL] &&
639 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
640 			continue;
641 
642 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
643 			continue;
644 
645 		if (tb[IFA_ADDRESS] &&
646 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
647 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
648 			continue;
649 
650 		if (ipv4_is_multicast(ifa->ifa_address))
651 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
652 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
653 		return 0;
654 	}
655 
656 	err = -EADDRNOTAVAIL;
657 errout:
658 	return err;
659 }
660 
661 #define INFINITY_LIFE_TIME	0xFFFFFFFF
662 
663 static void check_lifetime(struct work_struct *work)
664 {
665 	unsigned long now, next, next_sec, next_sched;
666 	struct in_ifaddr *ifa;
667 	struct hlist_node *n;
668 	int i;
669 
670 	now = jiffies;
671 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
672 
673 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
674 		bool change_needed = false;
675 
676 		rcu_read_lock();
677 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
678 			unsigned long age;
679 
680 			if (ifa->ifa_flags & IFA_F_PERMANENT)
681 				continue;
682 
683 			/* We try to batch several events at once. */
684 			age = (now - ifa->ifa_tstamp +
685 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
686 
687 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
688 			    age >= ifa->ifa_valid_lft) {
689 				change_needed = true;
690 			} else if (ifa->ifa_preferred_lft ==
691 				   INFINITY_LIFE_TIME) {
692 				continue;
693 			} else if (age >= ifa->ifa_preferred_lft) {
694 				if (time_before(ifa->ifa_tstamp +
695 						ifa->ifa_valid_lft * HZ, next))
696 					next = ifa->ifa_tstamp +
697 					       ifa->ifa_valid_lft * HZ;
698 
699 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
700 					change_needed = true;
701 			} else if (time_before(ifa->ifa_tstamp +
702 					       ifa->ifa_preferred_lft * HZ,
703 					       next)) {
704 				next = ifa->ifa_tstamp +
705 				       ifa->ifa_preferred_lft * HZ;
706 			}
707 		}
708 		rcu_read_unlock();
709 		if (!change_needed)
710 			continue;
711 		rtnl_lock();
712 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
713 			unsigned long age;
714 
715 			if (ifa->ifa_flags & IFA_F_PERMANENT)
716 				continue;
717 
718 			/* We try to batch several events at once. */
719 			age = (now - ifa->ifa_tstamp +
720 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
721 
722 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
723 			    age >= ifa->ifa_valid_lft) {
724 				struct in_ifaddr **ifap;
725 
726 				for (ifap = &ifa->ifa_dev->ifa_list;
727 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
728 					if (*ifap == ifa) {
729 						inet_del_ifa(ifa->ifa_dev,
730 							     ifap, 1);
731 						break;
732 					}
733 				}
734 			} else if (ifa->ifa_preferred_lft !=
735 				   INFINITY_LIFE_TIME &&
736 				   age >= ifa->ifa_preferred_lft &&
737 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
738 				ifa->ifa_flags |= IFA_F_DEPRECATED;
739 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
740 			}
741 		}
742 		rtnl_unlock();
743 	}
744 
745 	next_sec = round_jiffies_up(next);
746 	next_sched = next;
747 
748 	/* If rounded timeout is accurate enough, accept it. */
749 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
750 		next_sched = next_sec;
751 
752 	now = jiffies;
753 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
754 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
755 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
756 
757 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
758 			next_sched - now);
759 }
760 
761 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
762 			     __u32 prefered_lft)
763 {
764 	unsigned long timeout;
765 
766 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
767 
768 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
769 	if (addrconf_finite_timeout(timeout))
770 		ifa->ifa_valid_lft = timeout;
771 	else
772 		ifa->ifa_flags |= IFA_F_PERMANENT;
773 
774 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
775 	if (addrconf_finite_timeout(timeout)) {
776 		if (timeout == 0)
777 			ifa->ifa_flags |= IFA_F_DEPRECATED;
778 		ifa->ifa_preferred_lft = timeout;
779 	}
780 	ifa->ifa_tstamp = jiffies;
781 	if (!ifa->ifa_cstamp)
782 		ifa->ifa_cstamp = ifa->ifa_tstamp;
783 }
784 
785 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
786 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
787 				       struct netlink_ext_ack *extack)
788 {
789 	struct nlattr *tb[IFA_MAX+1];
790 	struct in_ifaddr *ifa;
791 	struct ifaddrmsg *ifm;
792 	struct net_device *dev;
793 	struct in_device *in_dev;
794 	int err;
795 
796 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
797 			  extack);
798 	if (err < 0)
799 		goto errout;
800 
801 	ifm = nlmsg_data(nlh);
802 	err = -EINVAL;
803 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
804 		goto errout;
805 
806 	dev = __dev_get_by_index(net, ifm->ifa_index);
807 	err = -ENODEV;
808 	if (!dev)
809 		goto errout;
810 
811 	in_dev = __in_dev_get_rtnl(dev);
812 	err = -ENOBUFS;
813 	if (!in_dev)
814 		goto errout;
815 
816 	ifa = inet_alloc_ifa();
817 	if (!ifa)
818 		/*
819 		 * A potential indev allocation can be left alive, it stays
820 		 * assigned to its device and is destroy with it.
821 		 */
822 		goto errout;
823 
824 	ipv4_devconf_setall(in_dev);
825 	neigh_parms_data_state_setall(in_dev->arp_parms);
826 	in_dev_hold(in_dev);
827 
828 	if (!tb[IFA_ADDRESS])
829 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
830 
831 	INIT_HLIST_NODE(&ifa->hash);
832 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
833 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
834 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
835 					 ifm->ifa_flags;
836 	ifa->ifa_scope = ifm->ifa_scope;
837 	ifa->ifa_dev = in_dev;
838 
839 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
840 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
841 
842 	if (tb[IFA_BROADCAST])
843 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
844 
845 	if (tb[IFA_LABEL])
846 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
847 	else
848 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
849 
850 	if (tb[IFA_RT_PRIORITY])
851 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
852 
853 	if (tb[IFA_CACHEINFO]) {
854 		struct ifa_cacheinfo *ci;
855 
856 		ci = nla_data(tb[IFA_CACHEINFO]);
857 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
858 			err = -EINVAL;
859 			goto errout_free;
860 		}
861 		*pvalid_lft = ci->ifa_valid;
862 		*pprefered_lft = ci->ifa_prefered;
863 	}
864 
865 	return ifa;
866 
867 errout_free:
868 	inet_free_ifa(ifa);
869 errout:
870 	return ERR_PTR(err);
871 }
872 
873 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
874 {
875 	struct in_device *in_dev = ifa->ifa_dev;
876 	struct in_ifaddr *ifa1, **ifap;
877 
878 	if (!ifa->ifa_local)
879 		return NULL;
880 
881 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
882 	     ifap = &ifa1->ifa_next) {
883 		if (ifa1->ifa_mask == ifa->ifa_mask &&
884 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
885 		    ifa1->ifa_local == ifa->ifa_local)
886 			return ifa1;
887 	}
888 	return NULL;
889 }
890 
891 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
892 			    struct netlink_ext_ack *extack)
893 {
894 	struct net *net = sock_net(skb->sk);
895 	struct in_ifaddr *ifa;
896 	struct in_ifaddr *ifa_existing;
897 	__u32 valid_lft = INFINITY_LIFE_TIME;
898 	__u32 prefered_lft = INFINITY_LIFE_TIME;
899 
900 	ASSERT_RTNL();
901 
902 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
903 	if (IS_ERR(ifa))
904 		return PTR_ERR(ifa);
905 
906 	ifa_existing = find_matching_ifa(ifa);
907 	if (!ifa_existing) {
908 		/* It would be best to check for !NLM_F_CREATE here but
909 		 * userspace already relies on not having to provide this.
910 		 */
911 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
912 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
913 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
914 					       true, ifa);
915 
916 			if (ret < 0) {
917 				inet_free_ifa(ifa);
918 				return ret;
919 			}
920 		}
921 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
922 					 extack);
923 	} else {
924 		u32 new_metric = ifa->ifa_rt_priority;
925 
926 		inet_free_ifa(ifa);
927 
928 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
929 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
930 			return -EEXIST;
931 		ifa = ifa_existing;
932 
933 		if (ifa->ifa_rt_priority != new_metric) {
934 			fib_modify_prefix_metric(ifa, new_metric);
935 			ifa->ifa_rt_priority = new_metric;
936 		}
937 
938 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
939 		cancel_delayed_work(&check_lifetime_work);
940 		queue_delayed_work(system_power_efficient_wq,
941 				&check_lifetime_work, 0);
942 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
943 	}
944 	return 0;
945 }
946 
947 /*
948  *	Determine a default network mask, based on the IP address.
949  */
950 
951 static int inet_abc_len(__be32 addr)
952 {
953 	int rc = -1;	/* Something else, probably a multicast. */
954 
955 	if (ipv4_is_zeronet(addr))
956 		rc = 0;
957 	else {
958 		__u32 haddr = ntohl(addr);
959 
960 		if (IN_CLASSA(haddr))
961 			rc = 8;
962 		else if (IN_CLASSB(haddr))
963 			rc = 16;
964 		else if (IN_CLASSC(haddr))
965 			rc = 24;
966 	}
967 
968 	return rc;
969 }
970 
971 
972 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
973 {
974 	struct sockaddr_in sin_orig;
975 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
976 	struct in_device *in_dev;
977 	struct in_ifaddr **ifap = NULL;
978 	struct in_ifaddr *ifa = NULL;
979 	struct net_device *dev;
980 	char *colon;
981 	int ret = -EFAULT;
982 	int tryaddrmatch = 0;
983 
984 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
985 
986 	/* save original address for comparison */
987 	memcpy(&sin_orig, sin, sizeof(*sin));
988 
989 	colon = strchr(ifr->ifr_name, ':');
990 	if (colon)
991 		*colon = 0;
992 
993 	dev_load(net, ifr->ifr_name);
994 
995 	switch (cmd) {
996 	case SIOCGIFADDR:	/* Get interface address */
997 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
998 	case SIOCGIFDSTADDR:	/* Get the destination address */
999 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1000 		/* Note that these ioctls will not sleep,
1001 		   so that we do not impose a lock.
1002 		   One day we will be forced to put shlock here (I mean SMP)
1003 		 */
1004 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1005 		memset(sin, 0, sizeof(*sin));
1006 		sin->sin_family = AF_INET;
1007 		break;
1008 
1009 	case SIOCSIFFLAGS:
1010 		ret = -EPERM;
1011 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1012 			goto out;
1013 		break;
1014 	case SIOCSIFADDR:	/* Set interface address (and family) */
1015 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1016 	case SIOCSIFDSTADDR:	/* Set the destination address */
1017 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1018 		ret = -EPERM;
1019 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1020 			goto out;
1021 		ret = -EINVAL;
1022 		if (sin->sin_family != AF_INET)
1023 			goto out;
1024 		break;
1025 	default:
1026 		ret = -EINVAL;
1027 		goto out;
1028 	}
1029 
1030 	rtnl_lock();
1031 
1032 	ret = -ENODEV;
1033 	dev = __dev_get_by_name(net, ifr->ifr_name);
1034 	if (!dev)
1035 		goto done;
1036 
1037 	if (colon)
1038 		*colon = ':';
1039 
1040 	in_dev = __in_dev_get_rtnl(dev);
1041 	if (in_dev) {
1042 		if (tryaddrmatch) {
1043 			/* Matthias Andree */
1044 			/* compare label and address (4.4BSD style) */
1045 			/* note: we only do this for a limited set of ioctls
1046 			   and only if the original address family was AF_INET.
1047 			   This is checked above. */
1048 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1049 			     ifap = &ifa->ifa_next) {
1050 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1051 				    sin_orig.sin_addr.s_addr ==
1052 							ifa->ifa_local) {
1053 					break; /* found */
1054 				}
1055 			}
1056 		}
1057 		/* we didn't get a match, maybe the application is
1058 		   4.3BSD-style and passed in junk so we fall back to
1059 		   comparing just the label */
1060 		if (!ifa) {
1061 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1062 			     ifap = &ifa->ifa_next)
1063 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1064 					break;
1065 		}
1066 	}
1067 
1068 	ret = -EADDRNOTAVAIL;
1069 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1070 		goto done;
1071 
1072 	switch (cmd) {
1073 	case SIOCGIFADDR:	/* Get interface address */
1074 		ret = 0;
1075 		sin->sin_addr.s_addr = ifa->ifa_local;
1076 		break;
1077 
1078 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1079 		ret = 0;
1080 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1081 		break;
1082 
1083 	case SIOCGIFDSTADDR:	/* Get the destination address */
1084 		ret = 0;
1085 		sin->sin_addr.s_addr = ifa->ifa_address;
1086 		break;
1087 
1088 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1089 		ret = 0;
1090 		sin->sin_addr.s_addr = ifa->ifa_mask;
1091 		break;
1092 
1093 	case SIOCSIFFLAGS:
1094 		if (colon) {
1095 			ret = -EADDRNOTAVAIL;
1096 			if (!ifa)
1097 				break;
1098 			ret = 0;
1099 			if (!(ifr->ifr_flags & IFF_UP))
1100 				inet_del_ifa(in_dev, ifap, 1);
1101 			break;
1102 		}
1103 		ret = dev_change_flags(dev, ifr->ifr_flags);
1104 		break;
1105 
1106 	case SIOCSIFADDR:	/* Set interface address (and family) */
1107 		ret = -EINVAL;
1108 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1109 			break;
1110 
1111 		if (!ifa) {
1112 			ret = -ENOBUFS;
1113 			ifa = inet_alloc_ifa();
1114 			if (!ifa)
1115 				break;
1116 			INIT_HLIST_NODE(&ifa->hash);
1117 			if (colon)
1118 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1119 			else
1120 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1121 		} else {
1122 			ret = 0;
1123 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1124 				break;
1125 			inet_del_ifa(in_dev, ifap, 0);
1126 			ifa->ifa_broadcast = 0;
1127 			ifa->ifa_scope = 0;
1128 		}
1129 
1130 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1131 
1132 		if (!(dev->flags & IFF_POINTOPOINT)) {
1133 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1134 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1135 			if ((dev->flags & IFF_BROADCAST) &&
1136 			    ifa->ifa_prefixlen < 31)
1137 				ifa->ifa_broadcast = ifa->ifa_address |
1138 						     ~ifa->ifa_mask;
1139 		} else {
1140 			ifa->ifa_prefixlen = 32;
1141 			ifa->ifa_mask = inet_make_mask(32);
1142 		}
1143 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1144 		ret = inet_set_ifa(dev, ifa);
1145 		break;
1146 
1147 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1148 		ret = 0;
1149 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1150 			inet_del_ifa(in_dev, ifap, 0);
1151 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1152 			inet_insert_ifa(ifa);
1153 		}
1154 		break;
1155 
1156 	case SIOCSIFDSTADDR:	/* Set the destination address */
1157 		ret = 0;
1158 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1159 			break;
1160 		ret = -EINVAL;
1161 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1162 			break;
1163 		ret = 0;
1164 		inet_del_ifa(in_dev, ifap, 0);
1165 		ifa->ifa_address = sin->sin_addr.s_addr;
1166 		inet_insert_ifa(ifa);
1167 		break;
1168 
1169 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1170 
1171 		/*
1172 		 *	The mask we set must be legal.
1173 		 */
1174 		ret = -EINVAL;
1175 		if (bad_mask(sin->sin_addr.s_addr, 0))
1176 			break;
1177 		ret = 0;
1178 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1179 			__be32 old_mask = ifa->ifa_mask;
1180 			inet_del_ifa(in_dev, ifap, 0);
1181 			ifa->ifa_mask = sin->sin_addr.s_addr;
1182 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1183 
1184 			/* See if current broadcast address matches
1185 			 * with current netmask, then recalculate
1186 			 * the broadcast address. Otherwise it's a
1187 			 * funny address, so don't touch it since
1188 			 * the user seems to know what (s)he's doing...
1189 			 */
1190 			if ((dev->flags & IFF_BROADCAST) &&
1191 			    (ifa->ifa_prefixlen < 31) &&
1192 			    (ifa->ifa_broadcast ==
1193 			     (ifa->ifa_local|~old_mask))) {
1194 				ifa->ifa_broadcast = (ifa->ifa_local |
1195 						      ~sin->sin_addr.s_addr);
1196 			}
1197 			inet_insert_ifa(ifa);
1198 		}
1199 		break;
1200 	}
1201 done:
1202 	rtnl_unlock();
1203 out:
1204 	return ret;
1205 }
1206 
1207 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1208 {
1209 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1210 	struct in_ifaddr *ifa;
1211 	struct ifreq ifr;
1212 	int done = 0;
1213 
1214 	if (WARN_ON(size > sizeof(struct ifreq)))
1215 		goto out;
1216 
1217 	if (!in_dev)
1218 		goto out;
1219 
1220 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1221 		if (!buf) {
1222 			done += size;
1223 			continue;
1224 		}
1225 		if (len < size)
1226 			break;
1227 		memset(&ifr, 0, sizeof(struct ifreq));
1228 		strcpy(ifr.ifr_name, ifa->ifa_label);
1229 
1230 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1231 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1232 								ifa->ifa_local;
1233 
1234 		if (copy_to_user(buf + done, &ifr, size)) {
1235 			done = -EFAULT;
1236 			break;
1237 		}
1238 		len  -= size;
1239 		done += size;
1240 	}
1241 out:
1242 	return done;
1243 }
1244 
1245 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1246 				 int scope)
1247 {
1248 	for_primary_ifa(in_dev) {
1249 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1250 		    ifa->ifa_scope <= scope)
1251 			return ifa->ifa_local;
1252 	} endfor_ifa(in_dev);
1253 
1254 	return 0;
1255 }
1256 
1257 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1258 {
1259 	__be32 addr = 0;
1260 	struct in_device *in_dev;
1261 	struct net *net = dev_net(dev);
1262 	int master_idx;
1263 
1264 	rcu_read_lock();
1265 	in_dev = __in_dev_get_rcu(dev);
1266 	if (!in_dev)
1267 		goto no_in_dev;
1268 
1269 	for_primary_ifa(in_dev) {
1270 		if (ifa->ifa_scope > scope)
1271 			continue;
1272 		if (!dst || inet_ifa_match(dst, ifa)) {
1273 			addr = ifa->ifa_local;
1274 			break;
1275 		}
1276 		if (!addr)
1277 			addr = ifa->ifa_local;
1278 	} endfor_ifa(in_dev);
1279 
1280 	if (addr)
1281 		goto out_unlock;
1282 no_in_dev:
1283 	master_idx = l3mdev_master_ifindex_rcu(dev);
1284 
1285 	/* For VRFs, the VRF device takes the place of the loopback device,
1286 	 * with addresses on it being preferred.  Note in such cases the
1287 	 * loopback device will be among the devices that fail the master_idx
1288 	 * equality check in the loop below.
1289 	 */
1290 	if (master_idx &&
1291 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1292 	    (in_dev = __in_dev_get_rcu(dev))) {
1293 		addr = in_dev_select_addr(in_dev, scope);
1294 		if (addr)
1295 			goto out_unlock;
1296 	}
1297 
1298 	/* Not loopback addresses on loopback should be preferred
1299 	   in this case. It is important that lo is the first interface
1300 	   in dev_base list.
1301 	 */
1302 	for_each_netdev_rcu(net, dev) {
1303 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1304 			continue;
1305 
1306 		in_dev = __in_dev_get_rcu(dev);
1307 		if (!in_dev)
1308 			continue;
1309 
1310 		addr = in_dev_select_addr(in_dev, scope);
1311 		if (addr)
1312 			goto out_unlock;
1313 	}
1314 out_unlock:
1315 	rcu_read_unlock();
1316 	return addr;
1317 }
1318 EXPORT_SYMBOL(inet_select_addr);
1319 
1320 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1321 			      __be32 local, int scope)
1322 {
1323 	int same = 0;
1324 	__be32 addr = 0;
1325 
1326 	for_ifa(in_dev) {
1327 		if (!addr &&
1328 		    (local == ifa->ifa_local || !local) &&
1329 		    ifa->ifa_scope <= scope) {
1330 			addr = ifa->ifa_local;
1331 			if (same)
1332 				break;
1333 		}
1334 		if (!same) {
1335 			same = (!local || inet_ifa_match(local, ifa)) &&
1336 				(!dst || inet_ifa_match(dst, ifa));
1337 			if (same && addr) {
1338 				if (local || !dst)
1339 					break;
1340 				/* Is the selected addr into dst subnet? */
1341 				if (inet_ifa_match(addr, ifa))
1342 					break;
1343 				/* No, then can we use new local src? */
1344 				if (ifa->ifa_scope <= scope) {
1345 					addr = ifa->ifa_local;
1346 					break;
1347 				}
1348 				/* search for large dst subnet for addr */
1349 				same = 0;
1350 			}
1351 		}
1352 	} endfor_ifa(in_dev);
1353 
1354 	return same ? addr : 0;
1355 }
1356 
1357 /*
1358  * Confirm that local IP address exists using wildcards:
1359  * - net: netns to check, cannot be NULL
1360  * - in_dev: only on this interface, NULL=any interface
1361  * - dst: only in the same subnet as dst, 0=any dst
1362  * - local: address, 0=autoselect the local address
1363  * - scope: maximum allowed scope value for the local address
1364  */
1365 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1366 			 __be32 dst, __be32 local, int scope)
1367 {
1368 	__be32 addr = 0;
1369 	struct net_device *dev;
1370 
1371 	if (in_dev)
1372 		return confirm_addr_indev(in_dev, dst, local, scope);
1373 
1374 	rcu_read_lock();
1375 	for_each_netdev_rcu(net, dev) {
1376 		in_dev = __in_dev_get_rcu(dev);
1377 		if (in_dev) {
1378 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1379 			if (addr)
1380 				break;
1381 		}
1382 	}
1383 	rcu_read_unlock();
1384 
1385 	return addr;
1386 }
1387 EXPORT_SYMBOL(inet_confirm_addr);
1388 
1389 /*
1390  *	Device notifier
1391  */
1392 
1393 int register_inetaddr_notifier(struct notifier_block *nb)
1394 {
1395 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1396 }
1397 EXPORT_SYMBOL(register_inetaddr_notifier);
1398 
1399 int unregister_inetaddr_notifier(struct notifier_block *nb)
1400 {
1401 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1402 }
1403 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1404 
1405 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1406 {
1407 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1408 }
1409 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1410 
1411 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1412 {
1413 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1414 	    nb);
1415 }
1416 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1417 
1418 /* Rename ifa_labels for a device name change. Make some effort to preserve
1419  * existing alias numbering and to create unique labels if possible.
1420 */
1421 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1422 {
1423 	struct in_ifaddr *ifa;
1424 	int named = 0;
1425 
1426 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1427 		char old[IFNAMSIZ], *dot;
1428 
1429 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1430 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1431 		if (named++ == 0)
1432 			goto skip;
1433 		dot = strchr(old, ':');
1434 		if (!dot) {
1435 			sprintf(old, ":%d", named);
1436 			dot = old;
1437 		}
1438 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1439 			strcat(ifa->ifa_label, dot);
1440 		else
1441 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1442 skip:
1443 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1444 	}
1445 }
1446 
1447 static bool inetdev_valid_mtu(unsigned int mtu)
1448 {
1449 	return mtu >= IPV4_MIN_MTU;
1450 }
1451 
1452 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1453 					struct in_device *in_dev)
1454 
1455 {
1456 	struct in_ifaddr *ifa;
1457 
1458 	for (ifa = in_dev->ifa_list; ifa;
1459 	     ifa = ifa->ifa_next) {
1460 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1461 			 ifa->ifa_local, dev,
1462 			 ifa->ifa_local, NULL,
1463 			 dev->dev_addr, NULL);
1464 	}
1465 }
1466 
1467 /* Called only under RTNL semaphore */
1468 
1469 static int inetdev_event(struct notifier_block *this, unsigned long event,
1470 			 void *ptr)
1471 {
1472 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1473 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1474 
1475 	ASSERT_RTNL();
1476 
1477 	if (!in_dev) {
1478 		if (event == NETDEV_REGISTER) {
1479 			in_dev = inetdev_init(dev);
1480 			if (IS_ERR(in_dev))
1481 				return notifier_from_errno(PTR_ERR(in_dev));
1482 			if (dev->flags & IFF_LOOPBACK) {
1483 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1484 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1485 			}
1486 		} else if (event == NETDEV_CHANGEMTU) {
1487 			/* Re-enabling IP */
1488 			if (inetdev_valid_mtu(dev->mtu))
1489 				in_dev = inetdev_init(dev);
1490 		}
1491 		goto out;
1492 	}
1493 
1494 	switch (event) {
1495 	case NETDEV_REGISTER:
1496 		pr_debug("%s: bug\n", __func__);
1497 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1498 		break;
1499 	case NETDEV_UP:
1500 		if (!inetdev_valid_mtu(dev->mtu))
1501 			break;
1502 		if (dev->flags & IFF_LOOPBACK) {
1503 			struct in_ifaddr *ifa = inet_alloc_ifa();
1504 
1505 			if (ifa) {
1506 				INIT_HLIST_NODE(&ifa->hash);
1507 				ifa->ifa_local =
1508 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1509 				ifa->ifa_prefixlen = 8;
1510 				ifa->ifa_mask = inet_make_mask(8);
1511 				in_dev_hold(in_dev);
1512 				ifa->ifa_dev = in_dev;
1513 				ifa->ifa_scope = RT_SCOPE_HOST;
1514 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1515 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1516 						 INFINITY_LIFE_TIME);
1517 				ipv4_devconf_setall(in_dev);
1518 				neigh_parms_data_state_setall(in_dev->arp_parms);
1519 				inet_insert_ifa(ifa);
1520 			}
1521 		}
1522 		ip_mc_up(in_dev);
1523 		/* fall through */
1524 	case NETDEV_CHANGEADDR:
1525 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1526 			break;
1527 		/* fall through */
1528 	case NETDEV_NOTIFY_PEERS:
1529 		/* Send gratuitous ARP to notify of link change */
1530 		inetdev_send_gratuitous_arp(dev, in_dev);
1531 		break;
1532 	case NETDEV_DOWN:
1533 		ip_mc_down(in_dev);
1534 		break;
1535 	case NETDEV_PRE_TYPE_CHANGE:
1536 		ip_mc_unmap(in_dev);
1537 		break;
1538 	case NETDEV_POST_TYPE_CHANGE:
1539 		ip_mc_remap(in_dev);
1540 		break;
1541 	case NETDEV_CHANGEMTU:
1542 		if (inetdev_valid_mtu(dev->mtu))
1543 			break;
1544 		/* disable IP when MTU is not enough */
1545 		/* fall through */
1546 	case NETDEV_UNREGISTER:
1547 		inetdev_destroy(in_dev);
1548 		break;
1549 	case NETDEV_CHANGENAME:
1550 		/* Do not notify about label change, this event is
1551 		 * not interesting to applications using netlink.
1552 		 */
1553 		inetdev_changename(dev, in_dev);
1554 
1555 		devinet_sysctl_unregister(in_dev);
1556 		devinet_sysctl_register(in_dev);
1557 		break;
1558 	}
1559 out:
1560 	return NOTIFY_DONE;
1561 }
1562 
1563 static struct notifier_block ip_netdev_notifier = {
1564 	.notifier_call = inetdev_event,
1565 };
1566 
1567 static size_t inet_nlmsg_size(void)
1568 {
1569 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1570 	       + nla_total_size(4) /* IFA_ADDRESS */
1571 	       + nla_total_size(4) /* IFA_LOCAL */
1572 	       + nla_total_size(4) /* IFA_BROADCAST */
1573 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1574 	       + nla_total_size(4)  /* IFA_FLAGS */
1575 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1576 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1577 }
1578 
1579 static inline u32 cstamp_delta(unsigned long cstamp)
1580 {
1581 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1582 }
1583 
1584 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1585 			 unsigned long tstamp, u32 preferred, u32 valid)
1586 {
1587 	struct ifa_cacheinfo ci;
1588 
1589 	ci.cstamp = cstamp_delta(cstamp);
1590 	ci.tstamp = cstamp_delta(tstamp);
1591 	ci.ifa_prefered = preferred;
1592 	ci.ifa_valid = valid;
1593 
1594 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1595 }
1596 
1597 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1598 			    struct inet_fill_args *args)
1599 {
1600 	struct ifaddrmsg *ifm;
1601 	struct nlmsghdr  *nlh;
1602 	u32 preferred, valid;
1603 
1604 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1605 			args->flags);
1606 	if (!nlh)
1607 		return -EMSGSIZE;
1608 
1609 	ifm = nlmsg_data(nlh);
1610 	ifm->ifa_family = AF_INET;
1611 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1612 	ifm->ifa_flags = ifa->ifa_flags;
1613 	ifm->ifa_scope = ifa->ifa_scope;
1614 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1615 
1616 	if (args->netnsid >= 0 &&
1617 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1618 		goto nla_put_failure;
1619 
1620 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1621 		preferred = ifa->ifa_preferred_lft;
1622 		valid = ifa->ifa_valid_lft;
1623 		if (preferred != INFINITY_LIFE_TIME) {
1624 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1625 
1626 			if (preferred > tval)
1627 				preferred -= tval;
1628 			else
1629 				preferred = 0;
1630 			if (valid != INFINITY_LIFE_TIME) {
1631 				if (valid > tval)
1632 					valid -= tval;
1633 				else
1634 					valid = 0;
1635 			}
1636 		}
1637 	} else {
1638 		preferred = INFINITY_LIFE_TIME;
1639 		valid = INFINITY_LIFE_TIME;
1640 	}
1641 	if ((ifa->ifa_address &&
1642 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1643 	    (ifa->ifa_local &&
1644 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1645 	    (ifa->ifa_broadcast &&
1646 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1647 	    (ifa->ifa_label[0] &&
1648 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1649 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1650 	    (ifa->ifa_rt_priority &&
1651 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1652 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1653 			  preferred, valid))
1654 		goto nla_put_failure;
1655 
1656 	nlmsg_end(skb, nlh);
1657 	return 0;
1658 
1659 nla_put_failure:
1660 	nlmsg_cancel(skb, nlh);
1661 	return -EMSGSIZE;
1662 }
1663 
1664 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1665 				      struct inet_fill_args *fillargs,
1666 				      struct net **tgt_net, struct sock *sk,
1667 				      struct netlink_callback *cb)
1668 {
1669 	struct netlink_ext_ack *extack = cb->extack;
1670 	struct nlattr *tb[IFA_MAX+1];
1671 	struct ifaddrmsg *ifm;
1672 	int err, i;
1673 
1674 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1675 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1676 		return -EINVAL;
1677 	}
1678 
1679 	ifm = nlmsg_data(nlh);
1680 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1681 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1682 		return -EINVAL;
1683 	}
1684 
1685 	fillargs->ifindex = ifm->ifa_index;
1686 	if (fillargs->ifindex) {
1687 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1688 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1689 	}
1690 
1691 	err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1692 				 ifa_ipv4_policy, extack);
1693 	if (err < 0)
1694 		return err;
1695 
1696 	for (i = 0; i <= IFA_MAX; ++i) {
1697 		if (!tb[i])
1698 			continue;
1699 
1700 		if (i == IFA_TARGET_NETNSID) {
1701 			struct net *net;
1702 
1703 			fillargs->netnsid = nla_get_s32(tb[i]);
1704 
1705 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1706 			if (IS_ERR(net)) {
1707 				fillargs->netnsid = -1;
1708 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1709 				return PTR_ERR(net);
1710 			}
1711 			*tgt_net = net;
1712 		} else {
1713 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1714 			return -EINVAL;
1715 		}
1716 	}
1717 
1718 	return 0;
1719 }
1720 
1721 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1722 			    struct netlink_callback *cb, int s_ip_idx,
1723 			    struct inet_fill_args *fillargs)
1724 {
1725 	struct in_ifaddr *ifa;
1726 	int ip_idx = 0;
1727 	int err;
1728 
1729 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next, ip_idx++) {
1730 		if (ip_idx < s_ip_idx)
1731 			continue;
1732 
1733 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1734 		if (err < 0)
1735 			goto done;
1736 
1737 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1738 	}
1739 	err = 0;
1740 
1741 done:
1742 	cb->args[2] = ip_idx;
1743 
1744 	return err;
1745 }
1746 
1747 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1748 {
1749 	const struct nlmsghdr *nlh = cb->nlh;
1750 	struct inet_fill_args fillargs = {
1751 		.portid = NETLINK_CB(cb->skb).portid,
1752 		.seq = nlh->nlmsg_seq,
1753 		.event = RTM_NEWADDR,
1754 		.flags = NLM_F_MULTI,
1755 		.netnsid = -1,
1756 	};
1757 	struct net *net = sock_net(skb->sk);
1758 	struct net *tgt_net = net;
1759 	int h, s_h;
1760 	int idx, s_idx;
1761 	int s_ip_idx;
1762 	struct net_device *dev;
1763 	struct in_device *in_dev;
1764 	struct hlist_head *head;
1765 	int err = 0;
1766 
1767 	s_h = cb->args[0];
1768 	s_idx = idx = cb->args[1];
1769 	s_ip_idx = cb->args[2];
1770 
1771 	if (cb->strict_check) {
1772 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1773 						 skb->sk, cb);
1774 		if (err < 0)
1775 			goto put_tgt_net;
1776 
1777 		err = 0;
1778 		if (fillargs.ifindex) {
1779 			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1780 			if (!dev) {
1781 				err = -ENODEV;
1782 				goto put_tgt_net;
1783 			}
1784 
1785 			in_dev = __in_dev_get_rtnl(dev);
1786 			if (in_dev) {
1787 				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1788 						       &fillargs);
1789 			}
1790 			goto put_tgt_net;
1791 		}
1792 	}
1793 
1794 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1795 		idx = 0;
1796 		head = &tgt_net->dev_index_head[h];
1797 		rcu_read_lock();
1798 		cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1799 			  tgt_net->dev_base_seq;
1800 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1801 			if (idx < s_idx)
1802 				goto cont;
1803 			if (h > s_h || idx > s_idx)
1804 				s_ip_idx = 0;
1805 			in_dev = __in_dev_get_rcu(dev);
1806 			if (!in_dev)
1807 				goto cont;
1808 
1809 			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1810 					       &fillargs);
1811 			if (err < 0) {
1812 				rcu_read_unlock();
1813 				goto done;
1814 			}
1815 cont:
1816 			idx++;
1817 		}
1818 		rcu_read_unlock();
1819 	}
1820 
1821 done:
1822 	cb->args[0] = h;
1823 	cb->args[1] = idx;
1824 put_tgt_net:
1825 	if (fillargs.netnsid >= 0)
1826 		put_net(tgt_net);
1827 
1828 	return err < 0 ? err : skb->len;
1829 }
1830 
1831 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1832 		      u32 portid)
1833 {
1834 	struct inet_fill_args fillargs = {
1835 		.portid = portid,
1836 		.seq = nlh ? nlh->nlmsg_seq : 0,
1837 		.event = event,
1838 		.flags = 0,
1839 		.netnsid = -1,
1840 	};
1841 	struct sk_buff *skb;
1842 	int err = -ENOBUFS;
1843 	struct net *net;
1844 
1845 	net = dev_net(ifa->ifa_dev->dev);
1846 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1847 	if (!skb)
1848 		goto errout;
1849 
1850 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1851 	if (err < 0) {
1852 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1853 		WARN_ON(err == -EMSGSIZE);
1854 		kfree_skb(skb);
1855 		goto errout;
1856 	}
1857 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1858 	return;
1859 errout:
1860 	if (err < 0)
1861 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1862 }
1863 
1864 static size_t inet_get_link_af_size(const struct net_device *dev,
1865 				    u32 ext_filter_mask)
1866 {
1867 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1868 
1869 	if (!in_dev)
1870 		return 0;
1871 
1872 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1873 }
1874 
1875 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1876 			     u32 ext_filter_mask)
1877 {
1878 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1879 	struct nlattr *nla;
1880 	int i;
1881 
1882 	if (!in_dev)
1883 		return -ENODATA;
1884 
1885 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1886 	if (!nla)
1887 		return -EMSGSIZE;
1888 
1889 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1890 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1891 
1892 	return 0;
1893 }
1894 
1895 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1896 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1897 };
1898 
1899 static int inet_validate_link_af(const struct net_device *dev,
1900 				 const struct nlattr *nla)
1901 {
1902 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1903 	int err, rem;
1904 
1905 	if (dev && !__in_dev_get_rcu(dev))
1906 		return -EAFNOSUPPORT;
1907 
1908 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1909 	if (err < 0)
1910 		return err;
1911 
1912 	if (tb[IFLA_INET_CONF]) {
1913 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1914 			int cfgid = nla_type(a);
1915 
1916 			if (nla_len(a) < 4)
1917 				return -EINVAL;
1918 
1919 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1920 				return -EINVAL;
1921 		}
1922 	}
1923 
1924 	return 0;
1925 }
1926 
1927 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1928 {
1929 	struct in_device *in_dev = __in_dev_get_rcu(dev);
1930 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1931 	int rem;
1932 
1933 	if (!in_dev)
1934 		return -EAFNOSUPPORT;
1935 
1936 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1937 		BUG();
1938 
1939 	if (tb[IFLA_INET_CONF]) {
1940 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1941 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1942 	}
1943 
1944 	return 0;
1945 }
1946 
1947 static int inet_netconf_msgsize_devconf(int type)
1948 {
1949 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1950 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1951 	bool all = false;
1952 
1953 	if (type == NETCONFA_ALL)
1954 		all = true;
1955 
1956 	if (all || type == NETCONFA_FORWARDING)
1957 		size += nla_total_size(4);
1958 	if (all || type == NETCONFA_RP_FILTER)
1959 		size += nla_total_size(4);
1960 	if (all || type == NETCONFA_MC_FORWARDING)
1961 		size += nla_total_size(4);
1962 	if (all || type == NETCONFA_BC_FORWARDING)
1963 		size += nla_total_size(4);
1964 	if (all || type == NETCONFA_PROXY_NEIGH)
1965 		size += nla_total_size(4);
1966 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1967 		size += nla_total_size(4);
1968 
1969 	return size;
1970 }
1971 
1972 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1973 				     struct ipv4_devconf *devconf, u32 portid,
1974 				     u32 seq, int event, unsigned int flags,
1975 				     int type)
1976 {
1977 	struct nlmsghdr  *nlh;
1978 	struct netconfmsg *ncm;
1979 	bool all = false;
1980 
1981 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1982 			flags);
1983 	if (!nlh)
1984 		return -EMSGSIZE;
1985 
1986 	if (type == NETCONFA_ALL)
1987 		all = true;
1988 
1989 	ncm = nlmsg_data(nlh);
1990 	ncm->ncm_family = AF_INET;
1991 
1992 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1993 		goto nla_put_failure;
1994 
1995 	if (!devconf)
1996 		goto out;
1997 
1998 	if ((all || type == NETCONFA_FORWARDING) &&
1999 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2000 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2001 		goto nla_put_failure;
2002 	if ((all || type == NETCONFA_RP_FILTER) &&
2003 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2004 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2005 		goto nla_put_failure;
2006 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2007 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2008 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2009 		goto nla_put_failure;
2010 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2011 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2012 			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2013 		goto nla_put_failure;
2014 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2015 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2016 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2017 		goto nla_put_failure;
2018 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2019 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2020 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2021 		goto nla_put_failure;
2022 
2023 out:
2024 	nlmsg_end(skb, nlh);
2025 	return 0;
2026 
2027 nla_put_failure:
2028 	nlmsg_cancel(skb, nlh);
2029 	return -EMSGSIZE;
2030 }
2031 
2032 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2033 				 int ifindex, struct ipv4_devconf *devconf)
2034 {
2035 	struct sk_buff *skb;
2036 	int err = -ENOBUFS;
2037 
2038 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2039 	if (!skb)
2040 		goto errout;
2041 
2042 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2043 					event, 0, type);
2044 	if (err < 0) {
2045 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2046 		WARN_ON(err == -EMSGSIZE);
2047 		kfree_skb(skb);
2048 		goto errout;
2049 	}
2050 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2051 	return;
2052 errout:
2053 	if (err < 0)
2054 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2055 }
2056 
2057 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2058 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2059 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2060 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2061 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2062 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2063 };
2064 
2065 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2066 				    struct nlmsghdr *nlh,
2067 				    struct netlink_ext_ack *extack)
2068 {
2069 	struct net *net = sock_net(in_skb->sk);
2070 	struct nlattr *tb[NETCONFA_MAX+1];
2071 	struct netconfmsg *ncm;
2072 	struct sk_buff *skb;
2073 	struct ipv4_devconf *devconf;
2074 	struct in_device *in_dev;
2075 	struct net_device *dev;
2076 	int ifindex;
2077 	int err;
2078 
2079 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
2080 			  devconf_ipv4_policy, extack);
2081 	if (err < 0)
2082 		goto errout;
2083 
2084 	err = -EINVAL;
2085 	if (!tb[NETCONFA_IFINDEX])
2086 		goto errout;
2087 
2088 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2089 	switch (ifindex) {
2090 	case NETCONFA_IFINDEX_ALL:
2091 		devconf = net->ipv4.devconf_all;
2092 		break;
2093 	case NETCONFA_IFINDEX_DEFAULT:
2094 		devconf = net->ipv4.devconf_dflt;
2095 		break;
2096 	default:
2097 		dev = __dev_get_by_index(net, ifindex);
2098 		if (!dev)
2099 			goto errout;
2100 		in_dev = __in_dev_get_rtnl(dev);
2101 		if (!in_dev)
2102 			goto errout;
2103 		devconf = &in_dev->cnf;
2104 		break;
2105 	}
2106 
2107 	err = -ENOBUFS;
2108 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2109 	if (!skb)
2110 		goto errout;
2111 
2112 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2113 					NETLINK_CB(in_skb).portid,
2114 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2115 					NETCONFA_ALL);
2116 	if (err < 0) {
2117 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2118 		WARN_ON(err == -EMSGSIZE);
2119 		kfree_skb(skb);
2120 		goto errout;
2121 	}
2122 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2123 errout:
2124 	return err;
2125 }
2126 
2127 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2128 				     struct netlink_callback *cb)
2129 {
2130 	const struct nlmsghdr *nlh = cb->nlh;
2131 	struct net *net = sock_net(skb->sk);
2132 	int h, s_h;
2133 	int idx, s_idx;
2134 	struct net_device *dev;
2135 	struct in_device *in_dev;
2136 	struct hlist_head *head;
2137 
2138 	if (cb->strict_check) {
2139 		struct netlink_ext_ack *extack = cb->extack;
2140 		struct netconfmsg *ncm;
2141 
2142 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2143 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2144 			return -EINVAL;
2145 		}
2146 
2147 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2148 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2149 			return -EINVAL;
2150 		}
2151 	}
2152 
2153 	s_h = cb->args[0];
2154 	s_idx = idx = cb->args[1];
2155 
2156 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2157 		idx = 0;
2158 		head = &net->dev_index_head[h];
2159 		rcu_read_lock();
2160 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2161 			  net->dev_base_seq;
2162 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2163 			if (idx < s_idx)
2164 				goto cont;
2165 			in_dev = __in_dev_get_rcu(dev);
2166 			if (!in_dev)
2167 				goto cont;
2168 
2169 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2170 						      &in_dev->cnf,
2171 						      NETLINK_CB(cb->skb).portid,
2172 						      nlh->nlmsg_seq,
2173 						      RTM_NEWNETCONF,
2174 						      NLM_F_MULTI,
2175 						      NETCONFA_ALL) < 0) {
2176 				rcu_read_unlock();
2177 				goto done;
2178 			}
2179 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2180 cont:
2181 			idx++;
2182 		}
2183 		rcu_read_unlock();
2184 	}
2185 	if (h == NETDEV_HASHENTRIES) {
2186 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2187 					      net->ipv4.devconf_all,
2188 					      NETLINK_CB(cb->skb).portid,
2189 					      nlh->nlmsg_seq,
2190 					      RTM_NEWNETCONF, NLM_F_MULTI,
2191 					      NETCONFA_ALL) < 0)
2192 			goto done;
2193 		else
2194 			h++;
2195 	}
2196 	if (h == NETDEV_HASHENTRIES + 1) {
2197 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2198 					      net->ipv4.devconf_dflt,
2199 					      NETLINK_CB(cb->skb).portid,
2200 					      nlh->nlmsg_seq,
2201 					      RTM_NEWNETCONF, NLM_F_MULTI,
2202 					      NETCONFA_ALL) < 0)
2203 			goto done;
2204 		else
2205 			h++;
2206 	}
2207 done:
2208 	cb->args[0] = h;
2209 	cb->args[1] = idx;
2210 
2211 	return skb->len;
2212 }
2213 
2214 #ifdef CONFIG_SYSCTL
2215 
2216 static void devinet_copy_dflt_conf(struct net *net, int i)
2217 {
2218 	struct net_device *dev;
2219 
2220 	rcu_read_lock();
2221 	for_each_netdev_rcu(net, dev) {
2222 		struct in_device *in_dev;
2223 
2224 		in_dev = __in_dev_get_rcu(dev);
2225 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2226 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2227 	}
2228 	rcu_read_unlock();
2229 }
2230 
2231 /* called with RTNL locked */
2232 static void inet_forward_change(struct net *net)
2233 {
2234 	struct net_device *dev;
2235 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2236 
2237 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2238 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2239 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2240 				    NETCONFA_FORWARDING,
2241 				    NETCONFA_IFINDEX_ALL,
2242 				    net->ipv4.devconf_all);
2243 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2244 				    NETCONFA_FORWARDING,
2245 				    NETCONFA_IFINDEX_DEFAULT,
2246 				    net->ipv4.devconf_dflt);
2247 
2248 	for_each_netdev(net, dev) {
2249 		struct in_device *in_dev;
2250 
2251 		if (on)
2252 			dev_disable_lro(dev);
2253 
2254 		in_dev = __in_dev_get_rtnl(dev);
2255 		if (in_dev) {
2256 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2257 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2258 						    NETCONFA_FORWARDING,
2259 						    dev->ifindex, &in_dev->cnf);
2260 		}
2261 	}
2262 }
2263 
2264 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2265 {
2266 	if (cnf == net->ipv4.devconf_dflt)
2267 		return NETCONFA_IFINDEX_DEFAULT;
2268 	else if (cnf == net->ipv4.devconf_all)
2269 		return NETCONFA_IFINDEX_ALL;
2270 	else {
2271 		struct in_device *idev
2272 			= container_of(cnf, struct in_device, cnf);
2273 		return idev->dev->ifindex;
2274 	}
2275 }
2276 
2277 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2278 			     void __user *buffer,
2279 			     size_t *lenp, loff_t *ppos)
2280 {
2281 	int old_value = *(int *)ctl->data;
2282 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2283 	int new_value = *(int *)ctl->data;
2284 
2285 	if (write) {
2286 		struct ipv4_devconf *cnf = ctl->extra1;
2287 		struct net *net = ctl->extra2;
2288 		int i = (int *)ctl->data - cnf->data;
2289 		int ifindex;
2290 
2291 		set_bit(i, cnf->state);
2292 
2293 		if (cnf == net->ipv4.devconf_dflt)
2294 			devinet_copy_dflt_conf(net, i);
2295 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2296 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2297 			if ((new_value == 0) && (old_value != 0))
2298 				rt_cache_flush(net);
2299 
2300 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2301 		    new_value != old_value)
2302 			rt_cache_flush(net);
2303 
2304 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2305 		    new_value != old_value) {
2306 			ifindex = devinet_conf_ifindex(net, cnf);
2307 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2308 						    NETCONFA_RP_FILTER,
2309 						    ifindex, cnf);
2310 		}
2311 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2312 		    new_value != old_value) {
2313 			ifindex = devinet_conf_ifindex(net, cnf);
2314 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2315 						    NETCONFA_PROXY_NEIGH,
2316 						    ifindex, cnf);
2317 		}
2318 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2319 		    new_value != old_value) {
2320 			ifindex = devinet_conf_ifindex(net, cnf);
2321 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2322 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2323 						    ifindex, cnf);
2324 		}
2325 	}
2326 
2327 	return ret;
2328 }
2329 
2330 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2331 				  void __user *buffer,
2332 				  size_t *lenp, loff_t *ppos)
2333 {
2334 	int *valp = ctl->data;
2335 	int val = *valp;
2336 	loff_t pos = *ppos;
2337 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2338 
2339 	if (write && *valp != val) {
2340 		struct net *net = ctl->extra2;
2341 
2342 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2343 			if (!rtnl_trylock()) {
2344 				/* Restore the original values before restarting */
2345 				*valp = val;
2346 				*ppos = pos;
2347 				return restart_syscall();
2348 			}
2349 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2350 				inet_forward_change(net);
2351 			} else {
2352 				struct ipv4_devconf *cnf = ctl->extra1;
2353 				struct in_device *idev =
2354 					container_of(cnf, struct in_device, cnf);
2355 				if (*valp)
2356 					dev_disable_lro(idev->dev);
2357 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2358 							    NETCONFA_FORWARDING,
2359 							    idev->dev->ifindex,
2360 							    cnf);
2361 			}
2362 			rtnl_unlock();
2363 			rt_cache_flush(net);
2364 		} else
2365 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2366 						    NETCONFA_FORWARDING,
2367 						    NETCONFA_IFINDEX_DEFAULT,
2368 						    net->ipv4.devconf_dflt);
2369 	}
2370 
2371 	return ret;
2372 }
2373 
2374 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2375 				void __user *buffer,
2376 				size_t *lenp, loff_t *ppos)
2377 {
2378 	int *valp = ctl->data;
2379 	int val = *valp;
2380 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2381 	struct net *net = ctl->extra2;
2382 
2383 	if (write && *valp != val)
2384 		rt_cache_flush(net);
2385 
2386 	return ret;
2387 }
2388 
2389 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2390 	{ \
2391 		.procname	= name, \
2392 		.data		= ipv4_devconf.data + \
2393 				  IPV4_DEVCONF_ ## attr - 1, \
2394 		.maxlen		= sizeof(int), \
2395 		.mode		= mval, \
2396 		.proc_handler	= proc, \
2397 		.extra1		= &ipv4_devconf, \
2398 	}
2399 
2400 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2401 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2402 
2403 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2404 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2405 
2406 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2407 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2408 
2409 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2410 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2411 
2412 static struct devinet_sysctl_table {
2413 	struct ctl_table_header *sysctl_header;
2414 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2415 } devinet_sysctl = {
2416 	.devinet_vars = {
2417 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2418 					     devinet_sysctl_forward),
2419 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2420 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2421 
2422 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2423 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2424 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2425 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2426 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2427 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2428 					"accept_source_route"),
2429 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2430 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2431 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2432 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2433 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2434 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2435 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2436 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2437 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2438 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2439 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2440 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2441 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2442 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2443 					"force_igmp_version"),
2444 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2445 					"igmpv2_unsolicited_report_interval"),
2446 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2447 					"igmpv3_unsolicited_report_interval"),
2448 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2449 					"ignore_routes_with_linkdown"),
2450 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2451 					"drop_gratuitous_arp"),
2452 
2453 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2454 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2455 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2456 					      "promote_secondaries"),
2457 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2458 					      "route_localnet"),
2459 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2460 					      "drop_unicast_in_l2_multicast"),
2461 	},
2462 };
2463 
2464 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2465 				     int ifindex, struct ipv4_devconf *p)
2466 {
2467 	int i;
2468 	struct devinet_sysctl_table *t;
2469 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2470 
2471 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2472 	if (!t)
2473 		goto out;
2474 
2475 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2476 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2477 		t->devinet_vars[i].extra1 = p;
2478 		t->devinet_vars[i].extra2 = net;
2479 	}
2480 
2481 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2482 
2483 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2484 	if (!t->sysctl_header)
2485 		goto free;
2486 
2487 	p->sysctl = t;
2488 
2489 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2490 				    ifindex, p);
2491 	return 0;
2492 
2493 free:
2494 	kfree(t);
2495 out:
2496 	return -ENOBUFS;
2497 }
2498 
2499 static void __devinet_sysctl_unregister(struct net *net,
2500 					struct ipv4_devconf *cnf, int ifindex)
2501 {
2502 	struct devinet_sysctl_table *t = cnf->sysctl;
2503 
2504 	if (t) {
2505 		cnf->sysctl = NULL;
2506 		unregister_net_sysctl_table(t->sysctl_header);
2507 		kfree(t);
2508 	}
2509 
2510 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2511 }
2512 
2513 static int devinet_sysctl_register(struct in_device *idev)
2514 {
2515 	int err;
2516 
2517 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2518 		return -EINVAL;
2519 
2520 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2521 	if (err)
2522 		return err;
2523 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2524 					idev->dev->ifindex, &idev->cnf);
2525 	if (err)
2526 		neigh_sysctl_unregister(idev->arp_parms);
2527 	return err;
2528 }
2529 
2530 static void devinet_sysctl_unregister(struct in_device *idev)
2531 {
2532 	struct net *net = dev_net(idev->dev);
2533 
2534 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2535 	neigh_sysctl_unregister(idev->arp_parms);
2536 }
2537 
2538 static struct ctl_table ctl_forward_entry[] = {
2539 	{
2540 		.procname	= "ip_forward",
2541 		.data		= &ipv4_devconf.data[
2542 					IPV4_DEVCONF_FORWARDING - 1],
2543 		.maxlen		= sizeof(int),
2544 		.mode		= 0644,
2545 		.proc_handler	= devinet_sysctl_forward,
2546 		.extra1		= &ipv4_devconf,
2547 		.extra2		= &init_net,
2548 	},
2549 	{ },
2550 };
2551 #endif
2552 
2553 static __net_init int devinet_init_net(struct net *net)
2554 {
2555 	int err;
2556 	struct ipv4_devconf *all, *dflt;
2557 #ifdef CONFIG_SYSCTL
2558 	struct ctl_table *tbl = ctl_forward_entry;
2559 	struct ctl_table_header *forw_hdr;
2560 #endif
2561 
2562 	err = -ENOMEM;
2563 	all = &ipv4_devconf;
2564 	dflt = &ipv4_devconf_dflt;
2565 
2566 	if (!net_eq(net, &init_net)) {
2567 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2568 		if (!all)
2569 			goto err_alloc_all;
2570 
2571 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2572 		if (!dflt)
2573 			goto err_alloc_dflt;
2574 
2575 #ifdef CONFIG_SYSCTL
2576 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2577 		if (!tbl)
2578 			goto err_alloc_ctl;
2579 
2580 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2581 		tbl[0].extra1 = all;
2582 		tbl[0].extra2 = net;
2583 #endif
2584 	}
2585 
2586 #ifdef CONFIG_SYSCTL
2587 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2588 	if (err < 0)
2589 		goto err_reg_all;
2590 
2591 	err = __devinet_sysctl_register(net, "default",
2592 					NETCONFA_IFINDEX_DEFAULT, dflt);
2593 	if (err < 0)
2594 		goto err_reg_dflt;
2595 
2596 	err = -ENOMEM;
2597 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2598 	if (!forw_hdr)
2599 		goto err_reg_ctl;
2600 	net->ipv4.forw_hdr = forw_hdr;
2601 #endif
2602 
2603 	net->ipv4.devconf_all = all;
2604 	net->ipv4.devconf_dflt = dflt;
2605 	return 0;
2606 
2607 #ifdef CONFIG_SYSCTL
2608 err_reg_ctl:
2609 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2610 err_reg_dflt:
2611 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2612 err_reg_all:
2613 	if (tbl != ctl_forward_entry)
2614 		kfree(tbl);
2615 err_alloc_ctl:
2616 #endif
2617 	if (dflt != &ipv4_devconf_dflt)
2618 		kfree(dflt);
2619 err_alloc_dflt:
2620 	if (all != &ipv4_devconf)
2621 		kfree(all);
2622 err_alloc_all:
2623 	return err;
2624 }
2625 
2626 static __net_exit void devinet_exit_net(struct net *net)
2627 {
2628 #ifdef CONFIG_SYSCTL
2629 	struct ctl_table *tbl;
2630 
2631 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2632 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2633 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2634 				    NETCONFA_IFINDEX_DEFAULT);
2635 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2636 				    NETCONFA_IFINDEX_ALL);
2637 	kfree(tbl);
2638 #endif
2639 	kfree(net->ipv4.devconf_dflt);
2640 	kfree(net->ipv4.devconf_all);
2641 }
2642 
2643 static __net_initdata struct pernet_operations devinet_ops = {
2644 	.init = devinet_init_net,
2645 	.exit = devinet_exit_net,
2646 };
2647 
2648 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2649 	.family		  = AF_INET,
2650 	.fill_link_af	  = inet_fill_link_af,
2651 	.get_link_af_size = inet_get_link_af_size,
2652 	.validate_link_af = inet_validate_link_af,
2653 	.set_link_af	  = inet_set_link_af,
2654 };
2655 
2656 void __init devinet_init(void)
2657 {
2658 	int i;
2659 
2660 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2661 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2662 
2663 	register_pernet_subsys(&devinet_ops);
2664 
2665 	register_gifconf(PF_INET, inet_gifconf);
2666 	register_netdevice_notifier(&ip_netdev_notifier);
2667 
2668 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2669 
2670 	rtnl_af_register(&inet_af_ops);
2671 
2672 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2673 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2674 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2675 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2676 		      inet_netconf_dump_devconf, 0);
2677 }
2678