xref: /openbmc/linux/net/ipv4/devinet.c (revision 28efb0046512e8a13ed9f9bdf0d68d10bbfbe9cf)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60 
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77 	},
78 };
79 
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81 	.data = {
82 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89 	},
90 };
91 
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94 
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96 	[IFA_LOCAL]     	= { .type = NLA_U32 },
97 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
98 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
99 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
101 	[IFA_FLAGS]		= { .type = NLA_U32 },
102 };
103 
104 #define IN4_ADDR_HSIZE_SHIFT	8
105 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
106 
107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
108 
109 static u32 inet_addr_hash(const struct net *net, __be32 addr)
110 {
111 	u32 val = (__force u32) addr ^ net_hash_mix(net);
112 
113 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
114 }
115 
116 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
117 {
118 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
119 
120 	ASSERT_RTNL();
121 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
122 }
123 
124 static void inet_hash_remove(struct in_ifaddr *ifa)
125 {
126 	ASSERT_RTNL();
127 	hlist_del_init_rcu(&ifa->hash);
128 }
129 
130 /**
131  * __ip_dev_find - find the first device with a given source address.
132  * @net: the net namespace
133  * @addr: the source address
134  * @devref: if true, take a reference on the found device
135  *
136  * If a caller uses devref=false, it should be protected by RCU, or RTNL
137  */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140 	struct net_device *result = NULL;
141 	struct in_ifaddr *ifa;
142 
143 	rcu_read_lock();
144 	ifa = inet_lookup_ifaddr_rcu(net, addr);
145 	if (!ifa) {
146 		struct flowi4 fl4 = { .daddr = addr };
147 		struct fib_result res = { 0 };
148 		struct fib_table *local;
149 
150 		/* Fallback to FIB local table so that communication
151 		 * over loopback subnets work.
152 		 */
153 		local = fib_get_table(net, RT_TABLE_LOCAL);
154 		if (local &&
155 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
156 		    res.type == RTN_LOCAL)
157 			result = FIB_RES_DEV(res);
158 	} else {
159 		result = ifa->ifa_dev->dev;
160 	}
161 	if (result && devref)
162 		dev_hold(result);
163 	rcu_read_unlock();
164 	return result;
165 }
166 EXPORT_SYMBOL(__ip_dev_find);
167 
168 /* called under RCU lock */
169 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
170 {
171 	u32 hash = inet_addr_hash(net, addr);
172 	struct in_ifaddr *ifa;
173 
174 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
175 		if (ifa->ifa_local == addr &&
176 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
177 			return ifa;
178 
179 	return NULL;
180 }
181 
182 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
183 
184 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
185 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
186 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
187 			 int destroy);
188 #ifdef CONFIG_SYSCTL
189 static int devinet_sysctl_register(struct in_device *idev);
190 static void devinet_sysctl_unregister(struct in_device *idev);
191 #else
192 static int devinet_sysctl_register(struct in_device *idev)
193 {
194 	return 0;
195 }
196 static void devinet_sysctl_unregister(struct in_device *idev)
197 {
198 }
199 #endif
200 
201 /* Locks all the inet devices. */
202 
203 static struct in_ifaddr *inet_alloc_ifa(void)
204 {
205 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
206 }
207 
208 static void inet_rcu_free_ifa(struct rcu_head *head)
209 {
210 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
211 	if (ifa->ifa_dev)
212 		in_dev_put(ifa->ifa_dev);
213 	kfree(ifa);
214 }
215 
216 static void inet_free_ifa(struct in_ifaddr *ifa)
217 {
218 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
219 }
220 
221 void in_dev_finish_destroy(struct in_device *idev)
222 {
223 	struct net_device *dev = idev->dev;
224 
225 	WARN_ON(idev->ifa_list);
226 	WARN_ON(idev->mc_list);
227 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
228 #ifdef NET_REFCNT_DEBUG
229 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
230 #endif
231 	dev_put(dev);
232 	if (!idev->dead)
233 		pr_err("Freeing alive in_device %p\n", idev);
234 	else
235 		kfree(idev);
236 }
237 EXPORT_SYMBOL(in_dev_finish_destroy);
238 
239 static struct in_device *inetdev_init(struct net_device *dev)
240 {
241 	struct in_device *in_dev;
242 	int err = -ENOMEM;
243 
244 	ASSERT_RTNL();
245 
246 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
247 	if (!in_dev)
248 		goto out;
249 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
250 			sizeof(in_dev->cnf));
251 	in_dev->cnf.sysctl = NULL;
252 	in_dev->dev = dev;
253 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
254 	if (!in_dev->arp_parms)
255 		goto out_kfree;
256 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
257 		dev_disable_lro(dev);
258 	/* Reference in_dev->dev */
259 	dev_hold(dev);
260 	/* Account for reference dev->ip_ptr (below) */
261 	refcount_set(&in_dev->refcnt, 1);
262 
263 	err = devinet_sysctl_register(in_dev);
264 	if (err) {
265 		in_dev->dead = 1;
266 		in_dev_put(in_dev);
267 		in_dev = NULL;
268 		goto out;
269 	}
270 	ip_mc_init_dev(in_dev);
271 	if (dev->flags & IFF_UP)
272 		ip_mc_up(in_dev);
273 
274 	/* we can receive as soon as ip_ptr is set -- do this last */
275 	rcu_assign_pointer(dev->ip_ptr, in_dev);
276 out:
277 	return in_dev ?: ERR_PTR(err);
278 out_kfree:
279 	kfree(in_dev);
280 	in_dev = NULL;
281 	goto out;
282 }
283 
284 static void in_dev_rcu_put(struct rcu_head *head)
285 {
286 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
287 	in_dev_put(idev);
288 }
289 
290 static void inetdev_destroy(struct in_device *in_dev)
291 {
292 	struct in_ifaddr *ifa;
293 	struct net_device *dev;
294 
295 	ASSERT_RTNL();
296 
297 	dev = in_dev->dev;
298 
299 	in_dev->dead = 1;
300 
301 	ip_mc_destroy_dev(in_dev);
302 
303 	while ((ifa = in_dev->ifa_list) != NULL) {
304 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
305 		inet_free_ifa(ifa);
306 	}
307 
308 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
309 
310 	devinet_sysctl_unregister(in_dev);
311 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
312 	arp_ifdown(dev);
313 
314 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
315 }
316 
317 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
318 {
319 	rcu_read_lock();
320 	for_primary_ifa(in_dev) {
321 		if (inet_ifa_match(a, ifa)) {
322 			if (!b || inet_ifa_match(b, ifa)) {
323 				rcu_read_unlock();
324 				return 1;
325 			}
326 		}
327 	} endfor_ifa(in_dev);
328 	rcu_read_unlock();
329 	return 0;
330 }
331 
332 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
333 			 int destroy, struct nlmsghdr *nlh, u32 portid)
334 {
335 	struct in_ifaddr *promote = NULL;
336 	struct in_ifaddr *ifa, *ifa1 = *ifap;
337 	struct in_ifaddr *last_prim = in_dev->ifa_list;
338 	struct in_ifaddr *prev_prom = NULL;
339 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
340 
341 	ASSERT_RTNL();
342 
343 	if (in_dev->dead)
344 		goto no_promotions;
345 
346 	/* 1. Deleting primary ifaddr forces deletion all secondaries
347 	 * unless alias promotion is set
348 	 **/
349 
350 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
351 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
352 
353 		while ((ifa = *ifap1) != NULL) {
354 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
355 			    ifa1->ifa_scope <= ifa->ifa_scope)
356 				last_prim = ifa;
357 
358 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
359 			    ifa1->ifa_mask != ifa->ifa_mask ||
360 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
361 				ifap1 = &ifa->ifa_next;
362 				prev_prom = ifa;
363 				continue;
364 			}
365 
366 			if (!do_promote) {
367 				inet_hash_remove(ifa);
368 				*ifap1 = ifa->ifa_next;
369 
370 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
371 				blocking_notifier_call_chain(&inetaddr_chain,
372 						NETDEV_DOWN, ifa);
373 				inet_free_ifa(ifa);
374 			} else {
375 				promote = ifa;
376 				break;
377 			}
378 		}
379 	}
380 
381 	/* On promotion all secondaries from subnet are changing
382 	 * the primary IP, we must remove all their routes silently
383 	 * and later to add them back with new prefsrc. Do this
384 	 * while all addresses are on the device list.
385 	 */
386 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
387 		if (ifa1->ifa_mask == ifa->ifa_mask &&
388 		    inet_ifa_match(ifa1->ifa_address, ifa))
389 			fib_del_ifaddr(ifa, ifa1);
390 	}
391 
392 no_promotions:
393 	/* 2. Unlink it */
394 
395 	*ifap = ifa1->ifa_next;
396 	inet_hash_remove(ifa1);
397 
398 	/* 3. Announce address deletion */
399 
400 	/* Send message first, then call notifier.
401 	   At first sight, FIB update triggered by notifier
402 	   will refer to already deleted ifaddr, that could confuse
403 	   netlink listeners. It is not true: look, gated sees
404 	   that route deleted and if it still thinks that ifaddr
405 	   is valid, it will try to restore deleted routes... Grr.
406 	   So that, this order is correct.
407 	 */
408 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
409 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
410 
411 	if (promote) {
412 		struct in_ifaddr *next_sec = promote->ifa_next;
413 
414 		if (prev_prom) {
415 			prev_prom->ifa_next = promote->ifa_next;
416 			promote->ifa_next = last_prim->ifa_next;
417 			last_prim->ifa_next = promote;
418 		}
419 
420 		promote->ifa_flags &= ~IFA_F_SECONDARY;
421 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
422 		blocking_notifier_call_chain(&inetaddr_chain,
423 				NETDEV_UP, promote);
424 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
425 			if (ifa1->ifa_mask != ifa->ifa_mask ||
426 			    !inet_ifa_match(ifa1->ifa_address, ifa))
427 					continue;
428 			fib_add_ifaddr(ifa);
429 		}
430 
431 	}
432 	if (destroy)
433 		inet_free_ifa(ifa1);
434 }
435 
436 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
437 			 int destroy)
438 {
439 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
440 }
441 
442 static void check_lifetime(struct work_struct *work);
443 
444 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
445 
446 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
447 			     u32 portid)
448 {
449 	struct in_device *in_dev = ifa->ifa_dev;
450 	struct in_ifaddr *ifa1, **ifap, **last_primary;
451 	struct in_validator_info ivi;
452 	int ret;
453 
454 	ASSERT_RTNL();
455 
456 	if (!ifa->ifa_local) {
457 		inet_free_ifa(ifa);
458 		return 0;
459 	}
460 
461 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
462 	last_primary = &in_dev->ifa_list;
463 
464 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
465 	     ifap = &ifa1->ifa_next) {
466 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
467 		    ifa->ifa_scope <= ifa1->ifa_scope)
468 			last_primary = &ifa1->ifa_next;
469 		if (ifa1->ifa_mask == ifa->ifa_mask &&
470 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
471 			if (ifa1->ifa_local == ifa->ifa_local) {
472 				inet_free_ifa(ifa);
473 				return -EEXIST;
474 			}
475 			if (ifa1->ifa_scope != ifa->ifa_scope) {
476 				inet_free_ifa(ifa);
477 				return -EINVAL;
478 			}
479 			ifa->ifa_flags |= IFA_F_SECONDARY;
480 		}
481 	}
482 
483 	/* Allow any devices that wish to register ifaddr validtors to weigh
484 	 * in now, before changes are committed.  The rntl lock is serializing
485 	 * access here, so the state should not change between a validator call
486 	 * and a final notify on commit.  This isn't invoked on promotion under
487 	 * the assumption that validators are checking the address itself, and
488 	 * not the flags.
489 	 */
490 	ivi.ivi_addr = ifa->ifa_address;
491 	ivi.ivi_dev = ifa->ifa_dev;
492 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
493 					   NETDEV_UP, &ivi);
494 	ret = notifier_to_errno(ret);
495 	if (ret) {
496 		inet_free_ifa(ifa);
497 		return ret;
498 	}
499 
500 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
501 		prandom_seed((__force u32) ifa->ifa_local);
502 		ifap = last_primary;
503 	}
504 
505 	ifa->ifa_next = *ifap;
506 	*ifap = ifa;
507 
508 	inet_hash_insert(dev_net(in_dev->dev), ifa);
509 
510 	cancel_delayed_work(&check_lifetime_work);
511 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
512 
513 	/* Send message first, then call notifier.
514 	   Notifier will trigger FIB update, so that
515 	   listeners of netlink will know about new ifaddr */
516 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
517 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
518 
519 	return 0;
520 }
521 
522 static int inet_insert_ifa(struct in_ifaddr *ifa)
523 {
524 	return __inet_insert_ifa(ifa, NULL, 0);
525 }
526 
527 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
528 {
529 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
530 
531 	ASSERT_RTNL();
532 
533 	if (!in_dev) {
534 		inet_free_ifa(ifa);
535 		return -ENOBUFS;
536 	}
537 	ipv4_devconf_setall(in_dev);
538 	neigh_parms_data_state_setall(in_dev->arp_parms);
539 	if (ifa->ifa_dev != in_dev) {
540 		WARN_ON(ifa->ifa_dev);
541 		in_dev_hold(in_dev);
542 		ifa->ifa_dev = in_dev;
543 	}
544 	if (ipv4_is_loopback(ifa->ifa_local))
545 		ifa->ifa_scope = RT_SCOPE_HOST;
546 	return inet_insert_ifa(ifa);
547 }
548 
549 /* Caller must hold RCU or RTNL :
550  * We dont take a reference on found in_device
551  */
552 struct in_device *inetdev_by_index(struct net *net, int ifindex)
553 {
554 	struct net_device *dev;
555 	struct in_device *in_dev = NULL;
556 
557 	rcu_read_lock();
558 	dev = dev_get_by_index_rcu(net, ifindex);
559 	if (dev)
560 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
561 	rcu_read_unlock();
562 	return in_dev;
563 }
564 EXPORT_SYMBOL(inetdev_by_index);
565 
566 /* Called only from RTNL semaphored context. No locks. */
567 
568 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
569 				    __be32 mask)
570 {
571 	ASSERT_RTNL();
572 
573 	for_primary_ifa(in_dev) {
574 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
575 			return ifa;
576 	} endfor_ifa(in_dev);
577 	return NULL;
578 }
579 
580 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
581 {
582 	struct ip_mreqn mreq = {
583 		.imr_multiaddr.s_addr = ifa->ifa_address,
584 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
585 	};
586 	int ret;
587 
588 	ASSERT_RTNL();
589 
590 	lock_sock(sk);
591 	if (join)
592 		ret = ip_mc_join_group(sk, &mreq);
593 	else
594 		ret = ip_mc_leave_group(sk, &mreq);
595 	release_sock(sk);
596 
597 	return ret;
598 }
599 
600 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
601 			    struct netlink_ext_ack *extack)
602 {
603 	struct net *net = sock_net(skb->sk);
604 	struct nlattr *tb[IFA_MAX+1];
605 	struct in_device *in_dev;
606 	struct ifaddrmsg *ifm;
607 	struct in_ifaddr *ifa, **ifap;
608 	int err = -EINVAL;
609 
610 	ASSERT_RTNL();
611 
612 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
613 			  extack);
614 	if (err < 0)
615 		goto errout;
616 
617 	ifm = nlmsg_data(nlh);
618 	in_dev = inetdev_by_index(net, ifm->ifa_index);
619 	if (!in_dev) {
620 		err = -ENODEV;
621 		goto errout;
622 	}
623 
624 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
625 	     ifap = &ifa->ifa_next) {
626 		if (tb[IFA_LOCAL] &&
627 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
628 			continue;
629 
630 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
631 			continue;
632 
633 		if (tb[IFA_ADDRESS] &&
634 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
635 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
636 			continue;
637 
638 		if (ipv4_is_multicast(ifa->ifa_address))
639 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
640 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
641 		return 0;
642 	}
643 
644 	err = -EADDRNOTAVAIL;
645 errout:
646 	return err;
647 }
648 
649 #define INFINITY_LIFE_TIME	0xFFFFFFFF
650 
651 static void check_lifetime(struct work_struct *work)
652 {
653 	unsigned long now, next, next_sec, next_sched;
654 	struct in_ifaddr *ifa;
655 	struct hlist_node *n;
656 	int i;
657 
658 	now = jiffies;
659 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
660 
661 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
662 		bool change_needed = false;
663 
664 		rcu_read_lock();
665 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
666 			unsigned long age;
667 
668 			if (ifa->ifa_flags & IFA_F_PERMANENT)
669 				continue;
670 
671 			/* We try to batch several events at once. */
672 			age = (now - ifa->ifa_tstamp +
673 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
674 
675 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
676 			    age >= ifa->ifa_valid_lft) {
677 				change_needed = true;
678 			} else if (ifa->ifa_preferred_lft ==
679 				   INFINITY_LIFE_TIME) {
680 				continue;
681 			} else if (age >= ifa->ifa_preferred_lft) {
682 				if (time_before(ifa->ifa_tstamp +
683 						ifa->ifa_valid_lft * HZ, next))
684 					next = ifa->ifa_tstamp +
685 					       ifa->ifa_valid_lft * HZ;
686 
687 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
688 					change_needed = true;
689 			} else if (time_before(ifa->ifa_tstamp +
690 					       ifa->ifa_preferred_lft * HZ,
691 					       next)) {
692 				next = ifa->ifa_tstamp +
693 				       ifa->ifa_preferred_lft * HZ;
694 			}
695 		}
696 		rcu_read_unlock();
697 		if (!change_needed)
698 			continue;
699 		rtnl_lock();
700 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
701 			unsigned long age;
702 
703 			if (ifa->ifa_flags & IFA_F_PERMANENT)
704 				continue;
705 
706 			/* We try to batch several events at once. */
707 			age = (now - ifa->ifa_tstamp +
708 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
709 
710 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
711 			    age >= ifa->ifa_valid_lft) {
712 				struct in_ifaddr **ifap;
713 
714 				for (ifap = &ifa->ifa_dev->ifa_list;
715 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
716 					if (*ifap == ifa) {
717 						inet_del_ifa(ifa->ifa_dev,
718 							     ifap, 1);
719 						break;
720 					}
721 				}
722 			} else if (ifa->ifa_preferred_lft !=
723 				   INFINITY_LIFE_TIME &&
724 				   age >= ifa->ifa_preferred_lft &&
725 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
726 				ifa->ifa_flags |= IFA_F_DEPRECATED;
727 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
728 			}
729 		}
730 		rtnl_unlock();
731 	}
732 
733 	next_sec = round_jiffies_up(next);
734 	next_sched = next;
735 
736 	/* If rounded timeout is accurate enough, accept it. */
737 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
738 		next_sched = next_sec;
739 
740 	now = jiffies;
741 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
742 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
743 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
744 
745 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
746 			next_sched - now);
747 }
748 
749 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
750 			     __u32 prefered_lft)
751 {
752 	unsigned long timeout;
753 
754 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
755 
756 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
757 	if (addrconf_finite_timeout(timeout))
758 		ifa->ifa_valid_lft = timeout;
759 	else
760 		ifa->ifa_flags |= IFA_F_PERMANENT;
761 
762 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
763 	if (addrconf_finite_timeout(timeout)) {
764 		if (timeout == 0)
765 			ifa->ifa_flags |= IFA_F_DEPRECATED;
766 		ifa->ifa_preferred_lft = timeout;
767 	}
768 	ifa->ifa_tstamp = jiffies;
769 	if (!ifa->ifa_cstamp)
770 		ifa->ifa_cstamp = ifa->ifa_tstamp;
771 }
772 
773 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
774 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
775 {
776 	struct nlattr *tb[IFA_MAX+1];
777 	struct in_ifaddr *ifa;
778 	struct ifaddrmsg *ifm;
779 	struct net_device *dev;
780 	struct in_device *in_dev;
781 	int err;
782 
783 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
784 			  NULL);
785 	if (err < 0)
786 		goto errout;
787 
788 	ifm = nlmsg_data(nlh);
789 	err = -EINVAL;
790 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
791 		goto errout;
792 
793 	dev = __dev_get_by_index(net, ifm->ifa_index);
794 	err = -ENODEV;
795 	if (!dev)
796 		goto errout;
797 
798 	in_dev = __in_dev_get_rtnl(dev);
799 	err = -ENOBUFS;
800 	if (!in_dev)
801 		goto errout;
802 
803 	ifa = inet_alloc_ifa();
804 	if (!ifa)
805 		/*
806 		 * A potential indev allocation can be left alive, it stays
807 		 * assigned to its device and is destroy with it.
808 		 */
809 		goto errout;
810 
811 	ipv4_devconf_setall(in_dev);
812 	neigh_parms_data_state_setall(in_dev->arp_parms);
813 	in_dev_hold(in_dev);
814 
815 	if (!tb[IFA_ADDRESS])
816 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
817 
818 	INIT_HLIST_NODE(&ifa->hash);
819 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
820 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
821 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
822 					 ifm->ifa_flags;
823 	ifa->ifa_scope = ifm->ifa_scope;
824 	ifa->ifa_dev = in_dev;
825 
826 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
827 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
828 
829 	if (tb[IFA_BROADCAST])
830 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
831 
832 	if (tb[IFA_LABEL])
833 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
834 	else
835 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
836 
837 	if (tb[IFA_CACHEINFO]) {
838 		struct ifa_cacheinfo *ci;
839 
840 		ci = nla_data(tb[IFA_CACHEINFO]);
841 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
842 			err = -EINVAL;
843 			goto errout_free;
844 		}
845 		*pvalid_lft = ci->ifa_valid;
846 		*pprefered_lft = ci->ifa_prefered;
847 	}
848 
849 	return ifa;
850 
851 errout_free:
852 	inet_free_ifa(ifa);
853 errout:
854 	return ERR_PTR(err);
855 }
856 
857 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
858 {
859 	struct in_device *in_dev = ifa->ifa_dev;
860 	struct in_ifaddr *ifa1, **ifap;
861 
862 	if (!ifa->ifa_local)
863 		return NULL;
864 
865 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
866 	     ifap = &ifa1->ifa_next) {
867 		if (ifa1->ifa_mask == ifa->ifa_mask &&
868 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
869 		    ifa1->ifa_local == ifa->ifa_local)
870 			return ifa1;
871 	}
872 	return NULL;
873 }
874 
875 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
876 			    struct netlink_ext_ack *extack)
877 {
878 	struct net *net = sock_net(skb->sk);
879 	struct in_ifaddr *ifa;
880 	struct in_ifaddr *ifa_existing;
881 	__u32 valid_lft = INFINITY_LIFE_TIME;
882 	__u32 prefered_lft = INFINITY_LIFE_TIME;
883 
884 	ASSERT_RTNL();
885 
886 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
887 	if (IS_ERR(ifa))
888 		return PTR_ERR(ifa);
889 
890 	ifa_existing = find_matching_ifa(ifa);
891 	if (!ifa_existing) {
892 		/* It would be best to check for !NLM_F_CREATE here but
893 		 * userspace already relies on not having to provide this.
894 		 */
895 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
896 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
897 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
898 					       true, ifa);
899 
900 			if (ret < 0) {
901 				inet_free_ifa(ifa);
902 				return ret;
903 			}
904 		}
905 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
906 	} else {
907 		inet_free_ifa(ifa);
908 
909 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
910 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
911 			return -EEXIST;
912 		ifa = ifa_existing;
913 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
914 		cancel_delayed_work(&check_lifetime_work);
915 		queue_delayed_work(system_power_efficient_wq,
916 				&check_lifetime_work, 0);
917 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
918 	}
919 	return 0;
920 }
921 
922 /*
923  *	Determine a default network mask, based on the IP address.
924  */
925 
926 static int inet_abc_len(__be32 addr)
927 {
928 	int rc = -1;	/* Something else, probably a multicast. */
929 
930 	if (ipv4_is_zeronet(addr))
931 		rc = 0;
932 	else {
933 		__u32 haddr = ntohl(addr);
934 
935 		if (IN_CLASSA(haddr))
936 			rc = 8;
937 		else if (IN_CLASSB(haddr))
938 			rc = 16;
939 		else if (IN_CLASSC(haddr))
940 			rc = 24;
941 	}
942 
943 	return rc;
944 }
945 
946 
947 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
948 {
949 	struct ifreq ifr;
950 	struct sockaddr_in sin_orig;
951 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
952 	struct in_device *in_dev;
953 	struct in_ifaddr **ifap = NULL;
954 	struct in_ifaddr *ifa = NULL;
955 	struct net_device *dev;
956 	char *colon;
957 	int ret = -EFAULT;
958 	int tryaddrmatch = 0;
959 
960 	/*
961 	 *	Fetch the caller's info block into kernel space
962 	 */
963 
964 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
965 		goto out;
966 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
967 
968 	/* save original address for comparison */
969 	memcpy(&sin_orig, sin, sizeof(*sin));
970 
971 	colon = strchr(ifr.ifr_name, ':');
972 	if (colon)
973 		*colon = 0;
974 
975 	dev_load(net, ifr.ifr_name);
976 
977 	switch (cmd) {
978 	case SIOCGIFADDR:	/* Get interface address */
979 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
980 	case SIOCGIFDSTADDR:	/* Get the destination address */
981 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
982 		/* Note that these ioctls will not sleep,
983 		   so that we do not impose a lock.
984 		   One day we will be forced to put shlock here (I mean SMP)
985 		 */
986 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
987 		memset(sin, 0, sizeof(*sin));
988 		sin->sin_family = AF_INET;
989 		break;
990 
991 	case SIOCSIFFLAGS:
992 		ret = -EPERM;
993 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
994 			goto out;
995 		break;
996 	case SIOCSIFADDR:	/* Set interface address (and family) */
997 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
998 	case SIOCSIFDSTADDR:	/* Set the destination address */
999 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1000 		ret = -EPERM;
1001 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1002 			goto out;
1003 		ret = -EINVAL;
1004 		if (sin->sin_family != AF_INET)
1005 			goto out;
1006 		break;
1007 	default:
1008 		ret = -EINVAL;
1009 		goto out;
1010 	}
1011 
1012 	rtnl_lock();
1013 
1014 	ret = -ENODEV;
1015 	dev = __dev_get_by_name(net, ifr.ifr_name);
1016 	if (!dev)
1017 		goto done;
1018 
1019 	if (colon)
1020 		*colon = ':';
1021 
1022 	in_dev = __in_dev_get_rtnl(dev);
1023 	if (in_dev) {
1024 		if (tryaddrmatch) {
1025 			/* Matthias Andree */
1026 			/* compare label and address (4.4BSD style) */
1027 			/* note: we only do this for a limited set of ioctls
1028 			   and only if the original address family was AF_INET.
1029 			   This is checked above. */
1030 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1031 			     ifap = &ifa->ifa_next) {
1032 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1033 				    sin_orig.sin_addr.s_addr ==
1034 							ifa->ifa_local) {
1035 					break; /* found */
1036 				}
1037 			}
1038 		}
1039 		/* we didn't get a match, maybe the application is
1040 		   4.3BSD-style and passed in junk so we fall back to
1041 		   comparing just the label */
1042 		if (!ifa) {
1043 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1044 			     ifap = &ifa->ifa_next)
1045 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1046 					break;
1047 		}
1048 	}
1049 
1050 	ret = -EADDRNOTAVAIL;
1051 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1052 		goto done;
1053 
1054 	switch (cmd) {
1055 	case SIOCGIFADDR:	/* Get interface address */
1056 		sin->sin_addr.s_addr = ifa->ifa_local;
1057 		goto rarok;
1058 
1059 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1060 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1061 		goto rarok;
1062 
1063 	case SIOCGIFDSTADDR:	/* Get the destination address */
1064 		sin->sin_addr.s_addr = ifa->ifa_address;
1065 		goto rarok;
1066 
1067 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1068 		sin->sin_addr.s_addr = ifa->ifa_mask;
1069 		goto rarok;
1070 
1071 	case SIOCSIFFLAGS:
1072 		if (colon) {
1073 			ret = -EADDRNOTAVAIL;
1074 			if (!ifa)
1075 				break;
1076 			ret = 0;
1077 			if (!(ifr.ifr_flags & IFF_UP))
1078 				inet_del_ifa(in_dev, ifap, 1);
1079 			break;
1080 		}
1081 		ret = dev_change_flags(dev, ifr.ifr_flags);
1082 		break;
1083 
1084 	case SIOCSIFADDR:	/* Set interface address (and family) */
1085 		ret = -EINVAL;
1086 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1087 			break;
1088 
1089 		if (!ifa) {
1090 			ret = -ENOBUFS;
1091 			ifa = inet_alloc_ifa();
1092 			if (!ifa)
1093 				break;
1094 			INIT_HLIST_NODE(&ifa->hash);
1095 			if (colon)
1096 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1097 			else
1098 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1099 		} else {
1100 			ret = 0;
1101 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1102 				break;
1103 			inet_del_ifa(in_dev, ifap, 0);
1104 			ifa->ifa_broadcast = 0;
1105 			ifa->ifa_scope = 0;
1106 		}
1107 
1108 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1109 
1110 		if (!(dev->flags & IFF_POINTOPOINT)) {
1111 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1112 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1113 			if ((dev->flags & IFF_BROADCAST) &&
1114 			    ifa->ifa_prefixlen < 31)
1115 				ifa->ifa_broadcast = ifa->ifa_address |
1116 						     ~ifa->ifa_mask;
1117 		} else {
1118 			ifa->ifa_prefixlen = 32;
1119 			ifa->ifa_mask = inet_make_mask(32);
1120 		}
1121 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1122 		ret = inet_set_ifa(dev, ifa);
1123 		break;
1124 
1125 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1126 		ret = 0;
1127 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1128 			inet_del_ifa(in_dev, ifap, 0);
1129 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1130 			inet_insert_ifa(ifa);
1131 		}
1132 		break;
1133 
1134 	case SIOCSIFDSTADDR:	/* Set the destination address */
1135 		ret = 0;
1136 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1137 			break;
1138 		ret = -EINVAL;
1139 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1140 			break;
1141 		ret = 0;
1142 		inet_del_ifa(in_dev, ifap, 0);
1143 		ifa->ifa_address = sin->sin_addr.s_addr;
1144 		inet_insert_ifa(ifa);
1145 		break;
1146 
1147 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1148 
1149 		/*
1150 		 *	The mask we set must be legal.
1151 		 */
1152 		ret = -EINVAL;
1153 		if (bad_mask(sin->sin_addr.s_addr, 0))
1154 			break;
1155 		ret = 0;
1156 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1157 			__be32 old_mask = ifa->ifa_mask;
1158 			inet_del_ifa(in_dev, ifap, 0);
1159 			ifa->ifa_mask = sin->sin_addr.s_addr;
1160 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1161 
1162 			/* See if current broadcast address matches
1163 			 * with current netmask, then recalculate
1164 			 * the broadcast address. Otherwise it's a
1165 			 * funny address, so don't touch it since
1166 			 * the user seems to know what (s)he's doing...
1167 			 */
1168 			if ((dev->flags & IFF_BROADCAST) &&
1169 			    (ifa->ifa_prefixlen < 31) &&
1170 			    (ifa->ifa_broadcast ==
1171 			     (ifa->ifa_local|~old_mask))) {
1172 				ifa->ifa_broadcast = (ifa->ifa_local |
1173 						      ~sin->sin_addr.s_addr);
1174 			}
1175 			inet_insert_ifa(ifa);
1176 		}
1177 		break;
1178 	}
1179 done:
1180 	rtnl_unlock();
1181 out:
1182 	return ret;
1183 rarok:
1184 	rtnl_unlock();
1185 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1186 	goto out;
1187 }
1188 
1189 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1190 {
1191 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1192 	struct in_ifaddr *ifa;
1193 	struct ifreq ifr;
1194 	int done = 0;
1195 
1196 	if (!in_dev)
1197 		goto out;
1198 
1199 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1200 		if (!buf) {
1201 			done += sizeof(ifr);
1202 			continue;
1203 		}
1204 		if (len < (int) sizeof(ifr))
1205 			break;
1206 		memset(&ifr, 0, sizeof(struct ifreq));
1207 		strcpy(ifr.ifr_name, ifa->ifa_label);
1208 
1209 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1210 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1211 								ifa->ifa_local;
1212 
1213 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1214 			done = -EFAULT;
1215 			break;
1216 		}
1217 		buf  += sizeof(struct ifreq);
1218 		len  -= sizeof(struct ifreq);
1219 		done += sizeof(struct ifreq);
1220 	}
1221 out:
1222 	return done;
1223 }
1224 
1225 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1226 				 int scope)
1227 {
1228 	for_primary_ifa(in_dev) {
1229 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1230 		    ifa->ifa_scope <= scope)
1231 			return ifa->ifa_local;
1232 	} endfor_ifa(in_dev);
1233 
1234 	return 0;
1235 }
1236 
1237 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1238 {
1239 	__be32 addr = 0;
1240 	struct in_device *in_dev;
1241 	struct net *net = dev_net(dev);
1242 	int master_idx;
1243 
1244 	rcu_read_lock();
1245 	in_dev = __in_dev_get_rcu(dev);
1246 	if (!in_dev)
1247 		goto no_in_dev;
1248 
1249 	for_primary_ifa(in_dev) {
1250 		if (ifa->ifa_scope > scope)
1251 			continue;
1252 		if (!dst || inet_ifa_match(dst, ifa)) {
1253 			addr = ifa->ifa_local;
1254 			break;
1255 		}
1256 		if (!addr)
1257 			addr = ifa->ifa_local;
1258 	} endfor_ifa(in_dev);
1259 
1260 	if (addr)
1261 		goto out_unlock;
1262 no_in_dev:
1263 	master_idx = l3mdev_master_ifindex_rcu(dev);
1264 
1265 	/* For VRFs, the VRF device takes the place of the loopback device,
1266 	 * with addresses on it being preferred.  Note in such cases the
1267 	 * loopback device will be among the devices that fail the master_idx
1268 	 * equality check in the loop below.
1269 	 */
1270 	if (master_idx &&
1271 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1272 	    (in_dev = __in_dev_get_rcu(dev))) {
1273 		addr = in_dev_select_addr(in_dev, scope);
1274 		if (addr)
1275 			goto out_unlock;
1276 	}
1277 
1278 	/* Not loopback addresses on loopback should be preferred
1279 	   in this case. It is important that lo is the first interface
1280 	   in dev_base list.
1281 	 */
1282 	for_each_netdev_rcu(net, dev) {
1283 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1284 			continue;
1285 
1286 		in_dev = __in_dev_get_rcu(dev);
1287 		if (!in_dev)
1288 			continue;
1289 
1290 		addr = in_dev_select_addr(in_dev, scope);
1291 		if (addr)
1292 			goto out_unlock;
1293 	}
1294 out_unlock:
1295 	rcu_read_unlock();
1296 	return addr;
1297 }
1298 EXPORT_SYMBOL(inet_select_addr);
1299 
1300 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1301 			      __be32 local, int scope)
1302 {
1303 	int same = 0;
1304 	__be32 addr = 0;
1305 
1306 	for_ifa(in_dev) {
1307 		if (!addr &&
1308 		    (local == ifa->ifa_local || !local) &&
1309 		    ifa->ifa_scope <= scope) {
1310 			addr = ifa->ifa_local;
1311 			if (same)
1312 				break;
1313 		}
1314 		if (!same) {
1315 			same = (!local || inet_ifa_match(local, ifa)) &&
1316 				(!dst || inet_ifa_match(dst, ifa));
1317 			if (same && addr) {
1318 				if (local || !dst)
1319 					break;
1320 				/* Is the selected addr into dst subnet? */
1321 				if (inet_ifa_match(addr, ifa))
1322 					break;
1323 				/* No, then can we use new local src? */
1324 				if (ifa->ifa_scope <= scope) {
1325 					addr = ifa->ifa_local;
1326 					break;
1327 				}
1328 				/* search for large dst subnet for addr */
1329 				same = 0;
1330 			}
1331 		}
1332 	} endfor_ifa(in_dev);
1333 
1334 	return same ? addr : 0;
1335 }
1336 
1337 /*
1338  * Confirm that local IP address exists using wildcards:
1339  * - net: netns to check, cannot be NULL
1340  * - in_dev: only on this interface, NULL=any interface
1341  * - dst: only in the same subnet as dst, 0=any dst
1342  * - local: address, 0=autoselect the local address
1343  * - scope: maximum allowed scope value for the local address
1344  */
1345 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1346 			 __be32 dst, __be32 local, int scope)
1347 {
1348 	__be32 addr = 0;
1349 	struct net_device *dev;
1350 
1351 	if (in_dev)
1352 		return confirm_addr_indev(in_dev, dst, local, scope);
1353 
1354 	rcu_read_lock();
1355 	for_each_netdev_rcu(net, dev) {
1356 		in_dev = __in_dev_get_rcu(dev);
1357 		if (in_dev) {
1358 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1359 			if (addr)
1360 				break;
1361 		}
1362 	}
1363 	rcu_read_unlock();
1364 
1365 	return addr;
1366 }
1367 EXPORT_SYMBOL(inet_confirm_addr);
1368 
1369 /*
1370  *	Device notifier
1371  */
1372 
1373 int register_inetaddr_notifier(struct notifier_block *nb)
1374 {
1375 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1376 }
1377 EXPORT_SYMBOL(register_inetaddr_notifier);
1378 
1379 int unregister_inetaddr_notifier(struct notifier_block *nb)
1380 {
1381 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1382 }
1383 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1384 
1385 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1386 {
1387 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1388 }
1389 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1390 
1391 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1392 {
1393 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1394 	    nb);
1395 }
1396 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1397 
1398 /* Rename ifa_labels for a device name change. Make some effort to preserve
1399  * existing alias numbering and to create unique labels if possible.
1400 */
1401 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1402 {
1403 	struct in_ifaddr *ifa;
1404 	int named = 0;
1405 
1406 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1407 		char old[IFNAMSIZ], *dot;
1408 
1409 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1410 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1411 		if (named++ == 0)
1412 			goto skip;
1413 		dot = strchr(old, ':');
1414 		if (!dot) {
1415 			sprintf(old, ":%d", named);
1416 			dot = old;
1417 		}
1418 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1419 			strcat(ifa->ifa_label, dot);
1420 		else
1421 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1422 skip:
1423 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1424 	}
1425 }
1426 
1427 static bool inetdev_valid_mtu(unsigned int mtu)
1428 {
1429 	return mtu >= 68;
1430 }
1431 
1432 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1433 					struct in_device *in_dev)
1434 
1435 {
1436 	struct in_ifaddr *ifa;
1437 
1438 	for (ifa = in_dev->ifa_list; ifa;
1439 	     ifa = ifa->ifa_next) {
1440 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1441 			 ifa->ifa_local, dev,
1442 			 ifa->ifa_local, NULL,
1443 			 dev->dev_addr, NULL);
1444 	}
1445 }
1446 
1447 /* Called only under RTNL semaphore */
1448 
1449 static int inetdev_event(struct notifier_block *this, unsigned long event,
1450 			 void *ptr)
1451 {
1452 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1453 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1454 
1455 	ASSERT_RTNL();
1456 
1457 	if (!in_dev) {
1458 		if (event == NETDEV_REGISTER) {
1459 			in_dev = inetdev_init(dev);
1460 			if (IS_ERR(in_dev))
1461 				return notifier_from_errno(PTR_ERR(in_dev));
1462 			if (dev->flags & IFF_LOOPBACK) {
1463 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1464 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1465 			}
1466 		} else if (event == NETDEV_CHANGEMTU) {
1467 			/* Re-enabling IP */
1468 			if (inetdev_valid_mtu(dev->mtu))
1469 				in_dev = inetdev_init(dev);
1470 		}
1471 		goto out;
1472 	}
1473 
1474 	switch (event) {
1475 	case NETDEV_REGISTER:
1476 		pr_debug("%s: bug\n", __func__);
1477 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1478 		break;
1479 	case NETDEV_UP:
1480 		if (!inetdev_valid_mtu(dev->mtu))
1481 			break;
1482 		if (dev->flags & IFF_LOOPBACK) {
1483 			struct in_ifaddr *ifa = inet_alloc_ifa();
1484 
1485 			if (ifa) {
1486 				INIT_HLIST_NODE(&ifa->hash);
1487 				ifa->ifa_local =
1488 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1489 				ifa->ifa_prefixlen = 8;
1490 				ifa->ifa_mask = inet_make_mask(8);
1491 				in_dev_hold(in_dev);
1492 				ifa->ifa_dev = in_dev;
1493 				ifa->ifa_scope = RT_SCOPE_HOST;
1494 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1495 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1496 						 INFINITY_LIFE_TIME);
1497 				ipv4_devconf_setall(in_dev);
1498 				neigh_parms_data_state_setall(in_dev->arp_parms);
1499 				inet_insert_ifa(ifa);
1500 			}
1501 		}
1502 		ip_mc_up(in_dev);
1503 		/* fall through */
1504 	case NETDEV_CHANGEADDR:
1505 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1506 			break;
1507 		/* fall through */
1508 	case NETDEV_NOTIFY_PEERS:
1509 		/* Send gratuitous ARP to notify of link change */
1510 		inetdev_send_gratuitous_arp(dev, in_dev);
1511 		break;
1512 	case NETDEV_DOWN:
1513 		ip_mc_down(in_dev);
1514 		break;
1515 	case NETDEV_PRE_TYPE_CHANGE:
1516 		ip_mc_unmap(in_dev);
1517 		break;
1518 	case NETDEV_POST_TYPE_CHANGE:
1519 		ip_mc_remap(in_dev);
1520 		break;
1521 	case NETDEV_CHANGEMTU:
1522 		if (inetdev_valid_mtu(dev->mtu))
1523 			break;
1524 		/* disable IP when MTU is not enough */
1525 	case NETDEV_UNREGISTER:
1526 		inetdev_destroy(in_dev);
1527 		break;
1528 	case NETDEV_CHANGENAME:
1529 		/* Do not notify about label change, this event is
1530 		 * not interesting to applications using netlink.
1531 		 */
1532 		inetdev_changename(dev, in_dev);
1533 
1534 		devinet_sysctl_unregister(in_dev);
1535 		devinet_sysctl_register(in_dev);
1536 		break;
1537 	}
1538 out:
1539 	return NOTIFY_DONE;
1540 }
1541 
1542 static struct notifier_block ip_netdev_notifier = {
1543 	.notifier_call = inetdev_event,
1544 };
1545 
1546 static size_t inet_nlmsg_size(void)
1547 {
1548 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1549 	       + nla_total_size(4) /* IFA_ADDRESS */
1550 	       + nla_total_size(4) /* IFA_LOCAL */
1551 	       + nla_total_size(4) /* IFA_BROADCAST */
1552 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1553 	       + nla_total_size(4)  /* IFA_FLAGS */
1554 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1555 }
1556 
1557 static inline u32 cstamp_delta(unsigned long cstamp)
1558 {
1559 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1560 }
1561 
1562 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1563 			 unsigned long tstamp, u32 preferred, u32 valid)
1564 {
1565 	struct ifa_cacheinfo ci;
1566 
1567 	ci.cstamp = cstamp_delta(cstamp);
1568 	ci.tstamp = cstamp_delta(tstamp);
1569 	ci.ifa_prefered = preferred;
1570 	ci.ifa_valid = valid;
1571 
1572 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1573 }
1574 
1575 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1576 			    u32 portid, u32 seq, int event, unsigned int flags)
1577 {
1578 	struct ifaddrmsg *ifm;
1579 	struct nlmsghdr  *nlh;
1580 	u32 preferred, valid;
1581 
1582 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1583 	if (!nlh)
1584 		return -EMSGSIZE;
1585 
1586 	ifm = nlmsg_data(nlh);
1587 	ifm->ifa_family = AF_INET;
1588 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1589 	ifm->ifa_flags = ifa->ifa_flags;
1590 	ifm->ifa_scope = ifa->ifa_scope;
1591 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1592 
1593 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1594 		preferred = ifa->ifa_preferred_lft;
1595 		valid = ifa->ifa_valid_lft;
1596 		if (preferred != INFINITY_LIFE_TIME) {
1597 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1598 
1599 			if (preferred > tval)
1600 				preferred -= tval;
1601 			else
1602 				preferred = 0;
1603 			if (valid != INFINITY_LIFE_TIME) {
1604 				if (valid > tval)
1605 					valid -= tval;
1606 				else
1607 					valid = 0;
1608 			}
1609 		}
1610 	} else {
1611 		preferred = INFINITY_LIFE_TIME;
1612 		valid = INFINITY_LIFE_TIME;
1613 	}
1614 	if ((ifa->ifa_address &&
1615 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1616 	    (ifa->ifa_local &&
1617 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1618 	    (ifa->ifa_broadcast &&
1619 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1620 	    (ifa->ifa_label[0] &&
1621 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1622 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1623 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1624 			  preferred, valid))
1625 		goto nla_put_failure;
1626 
1627 	nlmsg_end(skb, nlh);
1628 	return 0;
1629 
1630 nla_put_failure:
1631 	nlmsg_cancel(skb, nlh);
1632 	return -EMSGSIZE;
1633 }
1634 
1635 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1636 {
1637 	struct net *net = sock_net(skb->sk);
1638 	int h, s_h;
1639 	int idx, s_idx;
1640 	int ip_idx, s_ip_idx;
1641 	struct net_device *dev;
1642 	struct in_device *in_dev;
1643 	struct in_ifaddr *ifa;
1644 	struct hlist_head *head;
1645 
1646 	s_h = cb->args[0];
1647 	s_idx = idx = cb->args[1];
1648 	s_ip_idx = ip_idx = cb->args[2];
1649 
1650 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1651 		idx = 0;
1652 		head = &net->dev_index_head[h];
1653 		rcu_read_lock();
1654 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1655 			  net->dev_base_seq;
1656 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1657 			if (idx < s_idx)
1658 				goto cont;
1659 			if (h > s_h || idx > s_idx)
1660 				s_ip_idx = 0;
1661 			in_dev = __in_dev_get_rcu(dev);
1662 			if (!in_dev)
1663 				goto cont;
1664 
1665 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1666 			     ifa = ifa->ifa_next, ip_idx++) {
1667 				if (ip_idx < s_ip_idx)
1668 					continue;
1669 				if (inet_fill_ifaddr(skb, ifa,
1670 					     NETLINK_CB(cb->skb).portid,
1671 					     cb->nlh->nlmsg_seq,
1672 					     RTM_NEWADDR, NLM_F_MULTI) < 0) {
1673 					rcu_read_unlock();
1674 					goto done;
1675 				}
1676 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1677 			}
1678 cont:
1679 			idx++;
1680 		}
1681 		rcu_read_unlock();
1682 	}
1683 
1684 done:
1685 	cb->args[0] = h;
1686 	cb->args[1] = idx;
1687 	cb->args[2] = ip_idx;
1688 
1689 	return skb->len;
1690 }
1691 
1692 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1693 		      u32 portid)
1694 {
1695 	struct sk_buff *skb;
1696 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1697 	int err = -ENOBUFS;
1698 	struct net *net;
1699 
1700 	net = dev_net(ifa->ifa_dev->dev);
1701 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1702 	if (!skb)
1703 		goto errout;
1704 
1705 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1706 	if (err < 0) {
1707 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1708 		WARN_ON(err == -EMSGSIZE);
1709 		kfree_skb(skb);
1710 		goto errout;
1711 	}
1712 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1713 	return;
1714 errout:
1715 	if (err < 0)
1716 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1717 }
1718 
1719 static size_t inet_get_link_af_size(const struct net_device *dev,
1720 				    u32 ext_filter_mask)
1721 {
1722 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1723 
1724 	if (!in_dev)
1725 		return 0;
1726 
1727 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1728 }
1729 
1730 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1731 			     u32 ext_filter_mask)
1732 {
1733 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1734 	struct nlattr *nla;
1735 	int i;
1736 
1737 	if (!in_dev)
1738 		return -ENODATA;
1739 
1740 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1741 	if (!nla)
1742 		return -EMSGSIZE;
1743 
1744 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1745 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1746 
1747 	return 0;
1748 }
1749 
1750 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1751 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1752 };
1753 
1754 static int inet_validate_link_af(const struct net_device *dev,
1755 				 const struct nlattr *nla)
1756 {
1757 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1758 	int err, rem;
1759 
1760 	if (dev && !__in_dev_get_rtnl(dev))
1761 		return -EAFNOSUPPORT;
1762 
1763 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1764 	if (err < 0)
1765 		return err;
1766 
1767 	if (tb[IFLA_INET_CONF]) {
1768 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1769 			int cfgid = nla_type(a);
1770 
1771 			if (nla_len(a) < 4)
1772 				return -EINVAL;
1773 
1774 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1775 				return -EINVAL;
1776 		}
1777 	}
1778 
1779 	return 0;
1780 }
1781 
1782 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1783 {
1784 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1785 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1786 	int rem;
1787 
1788 	if (!in_dev)
1789 		return -EAFNOSUPPORT;
1790 
1791 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1792 		BUG();
1793 
1794 	if (tb[IFLA_INET_CONF]) {
1795 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1796 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1797 	}
1798 
1799 	return 0;
1800 }
1801 
1802 static int inet_netconf_msgsize_devconf(int type)
1803 {
1804 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1805 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1806 	bool all = false;
1807 
1808 	if (type == NETCONFA_ALL)
1809 		all = true;
1810 
1811 	if (all || type == NETCONFA_FORWARDING)
1812 		size += nla_total_size(4);
1813 	if (all || type == NETCONFA_RP_FILTER)
1814 		size += nla_total_size(4);
1815 	if (all || type == NETCONFA_MC_FORWARDING)
1816 		size += nla_total_size(4);
1817 	if (all || type == NETCONFA_PROXY_NEIGH)
1818 		size += nla_total_size(4);
1819 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1820 		size += nla_total_size(4);
1821 
1822 	return size;
1823 }
1824 
1825 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1826 				     struct ipv4_devconf *devconf, u32 portid,
1827 				     u32 seq, int event, unsigned int flags,
1828 				     int type)
1829 {
1830 	struct nlmsghdr  *nlh;
1831 	struct netconfmsg *ncm;
1832 	bool all = false;
1833 
1834 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1835 			flags);
1836 	if (!nlh)
1837 		return -EMSGSIZE;
1838 
1839 	if (type == NETCONFA_ALL)
1840 		all = true;
1841 
1842 	ncm = nlmsg_data(nlh);
1843 	ncm->ncm_family = AF_INET;
1844 
1845 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1846 		goto nla_put_failure;
1847 
1848 	if (!devconf)
1849 		goto out;
1850 
1851 	if ((all || type == NETCONFA_FORWARDING) &&
1852 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1853 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1854 		goto nla_put_failure;
1855 	if ((all || type == NETCONFA_RP_FILTER) &&
1856 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1857 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1858 		goto nla_put_failure;
1859 	if ((all || type == NETCONFA_MC_FORWARDING) &&
1860 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1861 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1862 		goto nla_put_failure;
1863 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
1864 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1865 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1866 		goto nla_put_failure;
1867 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1868 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1869 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1870 		goto nla_put_failure;
1871 
1872 out:
1873 	nlmsg_end(skb, nlh);
1874 	return 0;
1875 
1876 nla_put_failure:
1877 	nlmsg_cancel(skb, nlh);
1878 	return -EMSGSIZE;
1879 }
1880 
1881 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1882 				 int ifindex, struct ipv4_devconf *devconf)
1883 {
1884 	struct sk_buff *skb;
1885 	int err = -ENOBUFS;
1886 
1887 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1888 	if (!skb)
1889 		goto errout;
1890 
1891 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1892 					event, 0, type);
1893 	if (err < 0) {
1894 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1895 		WARN_ON(err == -EMSGSIZE);
1896 		kfree_skb(skb);
1897 		goto errout;
1898 	}
1899 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1900 	return;
1901 errout:
1902 	if (err < 0)
1903 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1904 }
1905 
1906 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1907 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1908 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1909 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1910 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1911 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
1912 };
1913 
1914 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1915 				    struct nlmsghdr *nlh,
1916 				    struct netlink_ext_ack *extack)
1917 {
1918 	struct net *net = sock_net(in_skb->sk);
1919 	struct nlattr *tb[NETCONFA_MAX+1];
1920 	struct netconfmsg *ncm;
1921 	struct sk_buff *skb;
1922 	struct ipv4_devconf *devconf;
1923 	struct in_device *in_dev;
1924 	struct net_device *dev;
1925 	int ifindex;
1926 	int err;
1927 
1928 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1929 			  devconf_ipv4_policy, extack);
1930 	if (err < 0)
1931 		goto errout;
1932 
1933 	err = -EINVAL;
1934 	if (!tb[NETCONFA_IFINDEX])
1935 		goto errout;
1936 
1937 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1938 	switch (ifindex) {
1939 	case NETCONFA_IFINDEX_ALL:
1940 		devconf = net->ipv4.devconf_all;
1941 		break;
1942 	case NETCONFA_IFINDEX_DEFAULT:
1943 		devconf = net->ipv4.devconf_dflt;
1944 		break;
1945 	default:
1946 		dev = __dev_get_by_index(net, ifindex);
1947 		if (!dev)
1948 			goto errout;
1949 		in_dev = __in_dev_get_rtnl(dev);
1950 		if (!in_dev)
1951 			goto errout;
1952 		devconf = &in_dev->cnf;
1953 		break;
1954 	}
1955 
1956 	err = -ENOBUFS;
1957 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1958 	if (!skb)
1959 		goto errout;
1960 
1961 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1962 					NETLINK_CB(in_skb).portid,
1963 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1964 					NETCONFA_ALL);
1965 	if (err < 0) {
1966 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1967 		WARN_ON(err == -EMSGSIZE);
1968 		kfree_skb(skb);
1969 		goto errout;
1970 	}
1971 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1972 errout:
1973 	return err;
1974 }
1975 
1976 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1977 				     struct netlink_callback *cb)
1978 {
1979 	struct net *net = sock_net(skb->sk);
1980 	int h, s_h;
1981 	int idx, s_idx;
1982 	struct net_device *dev;
1983 	struct in_device *in_dev;
1984 	struct hlist_head *head;
1985 
1986 	s_h = cb->args[0];
1987 	s_idx = idx = cb->args[1];
1988 
1989 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1990 		idx = 0;
1991 		head = &net->dev_index_head[h];
1992 		rcu_read_lock();
1993 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1994 			  net->dev_base_seq;
1995 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1996 			if (idx < s_idx)
1997 				goto cont;
1998 			in_dev = __in_dev_get_rcu(dev);
1999 			if (!in_dev)
2000 				goto cont;
2001 
2002 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2003 						      &in_dev->cnf,
2004 						      NETLINK_CB(cb->skb).portid,
2005 						      cb->nlh->nlmsg_seq,
2006 						      RTM_NEWNETCONF,
2007 						      NLM_F_MULTI,
2008 						      NETCONFA_ALL) < 0) {
2009 				rcu_read_unlock();
2010 				goto done;
2011 			}
2012 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2013 cont:
2014 			idx++;
2015 		}
2016 		rcu_read_unlock();
2017 	}
2018 	if (h == NETDEV_HASHENTRIES) {
2019 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2020 					      net->ipv4.devconf_all,
2021 					      NETLINK_CB(cb->skb).portid,
2022 					      cb->nlh->nlmsg_seq,
2023 					      RTM_NEWNETCONF, NLM_F_MULTI,
2024 					      NETCONFA_ALL) < 0)
2025 			goto done;
2026 		else
2027 			h++;
2028 	}
2029 	if (h == NETDEV_HASHENTRIES + 1) {
2030 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2031 					      net->ipv4.devconf_dflt,
2032 					      NETLINK_CB(cb->skb).portid,
2033 					      cb->nlh->nlmsg_seq,
2034 					      RTM_NEWNETCONF, NLM_F_MULTI,
2035 					      NETCONFA_ALL) < 0)
2036 			goto done;
2037 		else
2038 			h++;
2039 	}
2040 done:
2041 	cb->args[0] = h;
2042 	cb->args[1] = idx;
2043 
2044 	return skb->len;
2045 }
2046 
2047 #ifdef CONFIG_SYSCTL
2048 
2049 static void devinet_copy_dflt_conf(struct net *net, int i)
2050 {
2051 	struct net_device *dev;
2052 
2053 	rcu_read_lock();
2054 	for_each_netdev_rcu(net, dev) {
2055 		struct in_device *in_dev;
2056 
2057 		in_dev = __in_dev_get_rcu(dev);
2058 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2059 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2060 	}
2061 	rcu_read_unlock();
2062 }
2063 
2064 /* called with RTNL locked */
2065 static void inet_forward_change(struct net *net)
2066 {
2067 	struct net_device *dev;
2068 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2069 
2070 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2071 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2072 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2073 				    NETCONFA_FORWARDING,
2074 				    NETCONFA_IFINDEX_ALL,
2075 				    net->ipv4.devconf_all);
2076 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2077 				    NETCONFA_FORWARDING,
2078 				    NETCONFA_IFINDEX_DEFAULT,
2079 				    net->ipv4.devconf_dflt);
2080 
2081 	for_each_netdev(net, dev) {
2082 		struct in_device *in_dev;
2083 
2084 		if (on)
2085 			dev_disable_lro(dev);
2086 
2087 		in_dev = __in_dev_get_rtnl(dev);
2088 		if (in_dev) {
2089 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2090 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2091 						    NETCONFA_FORWARDING,
2092 						    dev->ifindex, &in_dev->cnf);
2093 		}
2094 	}
2095 }
2096 
2097 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2098 {
2099 	if (cnf == net->ipv4.devconf_dflt)
2100 		return NETCONFA_IFINDEX_DEFAULT;
2101 	else if (cnf == net->ipv4.devconf_all)
2102 		return NETCONFA_IFINDEX_ALL;
2103 	else {
2104 		struct in_device *idev
2105 			= container_of(cnf, struct in_device, cnf);
2106 		return idev->dev->ifindex;
2107 	}
2108 }
2109 
2110 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2111 			     void __user *buffer,
2112 			     size_t *lenp, loff_t *ppos)
2113 {
2114 	int old_value = *(int *)ctl->data;
2115 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2116 	int new_value = *(int *)ctl->data;
2117 
2118 	if (write) {
2119 		struct ipv4_devconf *cnf = ctl->extra1;
2120 		struct net *net = ctl->extra2;
2121 		int i = (int *)ctl->data - cnf->data;
2122 		int ifindex;
2123 
2124 		set_bit(i, cnf->state);
2125 
2126 		if (cnf == net->ipv4.devconf_dflt)
2127 			devinet_copy_dflt_conf(net, i);
2128 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2129 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2130 			if ((new_value == 0) && (old_value != 0))
2131 				rt_cache_flush(net);
2132 
2133 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2134 		    new_value != old_value) {
2135 			ifindex = devinet_conf_ifindex(net, cnf);
2136 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2137 						    NETCONFA_RP_FILTER,
2138 						    ifindex, cnf);
2139 		}
2140 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2141 		    new_value != old_value) {
2142 			ifindex = devinet_conf_ifindex(net, cnf);
2143 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2144 						    NETCONFA_PROXY_NEIGH,
2145 						    ifindex, cnf);
2146 		}
2147 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2148 		    new_value != old_value) {
2149 			ifindex = devinet_conf_ifindex(net, cnf);
2150 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2151 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2152 						    ifindex, cnf);
2153 		}
2154 	}
2155 
2156 	return ret;
2157 }
2158 
2159 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2160 				  void __user *buffer,
2161 				  size_t *lenp, loff_t *ppos)
2162 {
2163 	int *valp = ctl->data;
2164 	int val = *valp;
2165 	loff_t pos = *ppos;
2166 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2167 
2168 	if (write && *valp != val) {
2169 		struct net *net = ctl->extra2;
2170 
2171 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2172 			if (!rtnl_trylock()) {
2173 				/* Restore the original values before restarting */
2174 				*valp = val;
2175 				*ppos = pos;
2176 				return restart_syscall();
2177 			}
2178 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2179 				inet_forward_change(net);
2180 			} else {
2181 				struct ipv4_devconf *cnf = ctl->extra1;
2182 				struct in_device *idev =
2183 					container_of(cnf, struct in_device, cnf);
2184 				if (*valp)
2185 					dev_disable_lro(idev->dev);
2186 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2187 							    NETCONFA_FORWARDING,
2188 							    idev->dev->ifindex,
2189 							    cnf);
2190 			}
2191 			rtnl_unlock();
2192 			rt_cache_flush(net);
2193 		} else
2194 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2195 						    NETCONFA_FORWARDING,
2196 						    NETCONFA_IFINDEX_DEFAULT,
2197 						    net->ipv4.devconf_dflt);
2198 	}
2199 
2200 	return ret;
2201 }
2202 
2203 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2204 				void __user *buffer,
2205 				size_t *lenp, loff_t *ppos)
2206 {
2207 	int *valp = ctl->data;
2208 	int val = *valp;
2209 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2210 	struct net *net = ctl->extra2;
2211 
2212 	if (write && *valp != val)
2213 		rt_cache_flush(net);
2214 
2215 	return ret;
2216 }
2217 
2218 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2219 	{ \
2220 		.procname	= name, \
2221 		.data		= ipv4_devconf.data + \
2222 				  IPV4_DEVCONF_ ## attr - 1, \
2223 		.maxlen		= sizeof(int), \
2224 		.mode		= mval, \
2225 		.proc_handler	= proc, \
2226 		.extra1		= &ipv4_devconf, \
2227 	}
2228 
2229 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2230 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2231 
2232 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2233 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2234 
2235 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2236 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2237 
2238 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2239 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2240 
2241 static struct devinet_sysctl_table {
2242 	struct ctl_table_header *sysctl_header;
2243 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2244 } devinet_sysctl = {
2245 	.devinet_vars = {
2246 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2247 					     devinet_sysctl_forward),
2248 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2249 
2250 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2251 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2252 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2253 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2254 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2255 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2256 					"accept_source_route"),
2257 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2258 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2259 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2260 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2261 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2262 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2263 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2264 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2265 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2266 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2267 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2268 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2269 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2270 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2271 					"force_igmp_version"),
2272 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2273 					"igmpv2_unsolicited_report_interval"),
2274 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2275 					"igmpv3_unsolicited_report_interval"),
2276 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2277 					"ignore_routes_with_linkdown"),
2278 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2279 					"drop_gratuitous_arp"),
2280 
2281 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2282 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2283 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2284 					      "promote_secondaries"),
2285 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2286 					      "route_localnet"),
2287 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2288 					      "drop_unicast_in_l2_multicast"),
2289 	},
2290 };
2291 
2292 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2293 				     int ifindex, struct ipv4_devconf *p)
2294 {
2295 	int i;
2296 	struct devinet_sysctl_table *t;
2297 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2298 
2299 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2300 	if (!t)
2301 		goto out;
2302 
2303 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2304 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2305 		t->devinet_vars[i].extra1 = p;
2306 		t->devinet_vars[i].extra2 = net;
2307 	}
2308 
2309 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2310 
2311 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2312 	if (!t->sysctl_header)
2313 		goto free;
2314 
2315 	p->sysctl = t;
2316 
2317 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2318 				    ifindex, p);
2319 	return 0;
2320 
2321 free:
2322 	kfree(t);
2323 out:
2324 	return -ENOBUFS;
2325 }
2326 
2327 static void __devinet_sysctl_unregister(struct net *net,
2328 					struct ipv4_devconf *cnf, int ifindex)
2329 {
2330 	struct devinet_sysctl_table *t = cnf->sysctl;
2331 
2332 	if (t) {
2333 		cnf->sysctl = NULL;
2334 		unregister_net_sysctl_table(t->sysctl_header);
2335 		kfree(t);
2336 	}
2337 
2338 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2339 }
2340 
2341 static int devinet_sysctl_register(struct in_device *idev)
2342 {
2343 	int err;
2344 
2345 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2346 		return -EINVAL;
2347 
2348 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2349 	if (err)
2350 		return err;
2351 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2352 					idev->dev->ifindex, &idev->cnf);
2353 	if (err)
2354 		neigh_sysctl_unregister(idev->arp_parms);
2355 	return err;
2356 }
2357 
2358 static void devinet_sysctl_unregister(struct in_device *idev)
2359 {
2360 	struct net *net = dev_net(idev->dev);
2361 
2362 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2363 	neigh_sysctl_unregister(idev->arp_parms);
2364 }
2365 
2366 static struct ctl_table ctl_forward_entry[] = {
2367 	{
2368 		.procname	= "ip_forward",
2369 		.data		= &ipv4_devconf.data[
2370 					IPV4_DEVCONF_FORWARDING - 1],
2371 		.maxlen		= sizeof(int),
2372 		.mode		= 0644,
2373 		.proc_handler	= devinet_sysctl_forward,
2374 		.extra1		= &ipv4_devconf,
2375 		.extra2		= &init_net,
2376 	},
2377 	{ },
2378 };
2379 #endif
2380 
2381 static __net_init int devinet_init_net(struct net *net)
2382 {
2383 	int err;
2384 	struct ipv4_devconf *all, *dflt;
2385 #ifdef CONFIG_SYSCTL
2386 	struct ctl_table *tbl = ctl_forward_entry;
2387 	struct ctl_table_header *forw_hdr;
2388 #endif
2389 
2390 	err = -ENOMEM;
2391 	all = &ipv4_devconf;
2392 	dflt = &ipv4_devconf_dflt;
2393 
2394 	if (!net_eq(net, &init_net)) {
2395 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2396 		if (!all)
2397 			goto err_alloc_all;
2398 
2399 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2400 		if (!dflt)
2401 			goto err_alloc_dflt;
2402 
2403 #ifdef CONFIG_SYSCTL
2404 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2405 		if (!tbl)
2406 			goto err_alloc_ctl;
2407 
2408 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2409 		tbl[0].extra1 = all;
2410 		tbl[0].extra2 = net;
2411 #endif
2412 	}
2413 
2414 #ifdef CONFIG_SYSCTL
2415 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2416 	if (err < 0)
2417 		goto err_reg_all;
2418 
2419 	err = __devinet_sysctl_register(net, "default",
2420 					NETCONFA_IFINDEX_DEFAULT, dflt);
2421 	if (err < 0)
2422 		goto err_reg_dflt;
2423 
2424 	err = -ENOMEM;
2425 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2426 	if (!forw_hdr)
2427 		goto err_reg_ctl;
2428 	net->ipv4.forw_hdr = forw_hdr;
2429 #endif
2430 
2431 	net->ipv4.devconf_all = all;
2432 	net->ipv4.devconf_dflt = dflt;
2433 	return 0;
2434 
2435 #ifdef CONFIG_SYSCTL
2436 err_reg_ctl:
2437 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2438 err_reg_dflt:
2439 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2440 err_reg_all:
2441 	if (tbl != ctl_forward_entry)
2442 		kfree(tbl);
2443 err_alloc_ctl:
2444 #endif
2445 	if (dflt != &ipv4_devconf_dflt)
2446 		kfree(dflt);
2447 err_alloc_dflt:
2448 	if (all != &ipv4_devconf)
2449 		kfree(all);
2450 err_alloc_all:
2451 	return err;
2452 }
2453 
2454 static __net_exit void devinet_exit_net(struct net *net)
2455 {
2456 #ifdef CONFIG_SYSCTL
2457 	struct ctl_table *tbl;
2458 
2459 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2460 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2461 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2462 				    NETCONFA_IFINDEX_DEFAULT);
2463 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2464 				    NETCONFA_IFINDEX_ALL);
2465 	kfree(tbl);
2466 #endif
2467 	kfree(net->ipv4.devconf_dflt);
2468 	kfree(net->ipv4.devconf_all);
2469 }
2470 
2471 static __net_initdata struct pernet_operations devinet_ops = {
2472 	.init = devinet_init_net,
2473 	.exit = devinet_exit_net,
2474 };
2475 
2476 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2477 	.family		  = AF_INET,
2478 	.fill_link_af	  = inet_fill_link_af,
2479 	.get_link_af_size = inet_get_link_af_size,
2480 	.validate_link_af = inet_validate_link_af,
2481 	.set_link_af	  = inet_set_link_af,
2482 };
2483 
2484 void __init devinet_init(void)
2485 {
2486 	int i;
2487 
2488 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2489 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2490 
2491 	register_pernet_subsys(&devinet_ops);
2492 
2493 	register_gifconf(PF_INET, inet_gifconf);
2494 	register_netdevice_notifier(&ip_netdev_notifier);
2495 
2496 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2497 
2498 	rtnl_af_register(&inet_af_ops);
2499 
2500 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2501 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2502 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2503 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2504 		      inet_netconf_dump_devconf, 0);
2505 }
2506