xref: /openbmc/linux/net/ipv4/devinet.c (revision 2e6ae11dd0d1c37f44cec51a58fb2092e55ed0f5)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60 
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77 	},
78 };
79 
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81 	.data = {
82 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89 	},
90 };
91 
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94 
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96 	[IFA_LOCAL]     	= { .type = NLA_U32 },
97 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
98 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
99 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
101 	[IFA_FLAGS]		= { .type = NLA_U32 },
102 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
103 };
104 
105 #define IN4_ADDR_HSIZE_SHIFT	8
106 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
107 
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109 
110 static u32 inet_addr_hash(const struct net *net, __be32 addr)
111 {
112 	u32 val = (__force u32) addr ^ net_hash_mix(net);
113 
114 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116 
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
120 
121 	ASSERT_RTNL();
122 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123 }
124 
125 static void inet_hash_remove(struct in_ifaddr *ifa)
126 {
127 	ASSERT_RTNL();
128 	hlist_del_init_rcu(&ifa->hash);
129 }
130 
131 /**
132  * __ip_dev_find - find the first device with a given source address.
133  * @net: the net namespace
134  * @addr: the source address
135  * @devref: if true, take a reference on the found device
136  *
137  * If a caller uses devref=false, it should be protected by RCU, or RTNL
138  */
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 {
141 	struct net_device *result = NULL;
142 	struct in_ifaddr *ifa;
143 
144 	rcu_read_lock();
145 	ifa = inet_lookup_ifaddr_rcu(net, addr);
146 	if (!ifa) {
147 		struct flowi4 fl4 = { .daddr = addr };
148 		struct fib_result res = { 0 };
149 		struct fib_table *local;
150 
151 		/* Fallback to FIB local table so that communication
152 		 * over loopback subnets work.
153 		 */
154 		local = fib_get_table(net, RT_TABLE_LOCAL);
155 		if (local &&
156 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
157 		    res.type == RTN_LOCAL)
158 			result = FIB_RES_DEV(res);
159 	} else {
160 		result = ifa->ifa_dev->dev;
161 	}
162 	if (result && devref)
163 		dev_hold(result);
164 	rcu_read_unlock();
165 	return result;
166 }
167 EXPORT_SYMBOL(__ip_dev_find);
168 
169 /* called under RCU lock */
170 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
171 {
172 	u32 hash = inet_addr_hash(net, addr);
173 	struct in_ifaddr *ifa;
174 
175 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
176 		if (ifa->ifa_local == addr &&
177 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
178 			return ifa;
179 
180 	return NULL;
181 }
182 
183 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
184 
185 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
186 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
187 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
188 			 int destroy);
189 #ifdef CONFIG_SYSCTL
190 static int devinet_sysctl_register(struct in_device *idev);
191 static void devinet_sysctl_unregister(struct in_device *idev);
192 #else
193 static int devinet_sysctl_register(struct in_device *idev)
194 {
195 	return 0;
196 }
197 static void devinet_sysctl_unregister(struct in_device *idev)
198 {
199 }
200 #endif
201 
202 /* Locks all the inet devices. */
203 
204 static struct in_ifaddr *inet_alloc_ifa(void)
205 {
206 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
207 }
208 
209 static void inet_rcu_free_ifa(struct rcu_head *head)
210 {
211 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
212 	if (ifa->ifa_dev)
213 		in_dev_put(ifa->ifa_dev);
214 	kfree(ifa);
215 }
216 
217 static void inet_free_ifa(struct in_ifaddr *ifa)
218 {
219 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
220 }
221 
222 void in_dev_finish_destroy(struct in_device *idev)
223 {
224 	struct net_device *dev = idev->dev;
225 
226 	WARN_ON(idev->ifa_list);
227 	WARN_ON(idev->mc_list);
228 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
229 #ifdef NET_REFCNT_DEBUG
230 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
231 #endif
232 	dev_put(dev);
233 	if (!idev->dead)
234 		pr_err("Freeing alive in_device %p\n", idev);
235 	else
236 		kfree(idev);
237 }
238 EXPORT_SYMBOL(in_dev_finish_destroy);
239 
240 static struct in_device *inetdev_init(struct net_device *dev)
241 {
242 	struct in_device *in_dev;
243 	int err = -ENOMEM;
244 
245 	ASSERT_RTNL();
246 
247 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
248 	if (!in_dev)
249 		goto out;
250 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
251 			sizeof(in_dev->cnf));
252 	in_dev->cnf.sysctl = NULL;
253 	in_dev->dev = dev;
254 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
255 	if (!in_dev->arp_parms)
256 		goto out_kfree;
257 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
258 		dev_disable_lro(dev);
259 	/* Reference in_dev->dev */
260 	dev_hold(dev);
261 	/* Account for reference dev->ip_ptr (below) */
262 	refcount_set(&in_dev->refcnt, 1);
263 
264 	err = devinet_sysctl_register(in_dev);
265 	if (err) {
266 		in_dev->dead = 1;
267 		in_dev_put(in_dev);
268 		in_dev = NULL;
269 		goto out;
270 	}
271 	ip_mc_init_dev(in_dev);
272 	if (dev->flags & IFF_UP)
273 		ip_mc_up(in_dev);
274 
275 	/* we can receive as soon as ip_ptr is set -- do this last */
276 	rcu_assign_pointer(dev->ip_ptr, in_dev);
277 out:
278 	return in_dev ?: ERR_PTR(err);
279 out_kfree:
280 	kfree(in_dev);
281 	in_dev = NULL;
282 	goto out;
283 }
284 
285 static void in_dev_rcu_put(struct rcu_head *head)
286 {
287 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
288 	in_dev_put(idev);
289 }
290 
291 static void inetdev_destroy(struct in_device *in_dev)
292 {
293 	struct in_ifaddr *ifa;
294 	struct net_device *dev;
295 
296 	ASSERT_RTNL();
297 
298 	dev = in_dev->dev;
299 
300 	in_dev->dead = 1;
301 
302 	ip_mc_destroy_dev(in_dev);
303 
304 	while ((ifa = in_dev->ifa_list) != NULL) {
305 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
306 		inet_free_ifa(ifa);
307 	}
308 
309 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
310 
311 	devinet_sysctl_unregister(in_dev);
312 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
313 	arp_ifdown(dev);
314 
315 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
316 }
317 
318 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
319 {
320 	rcu_read_lock();
321 	for_primary_ifa(in_dev) {
322 		if (inet_ifa_match(a, ifa)) {
323 			if (!b || inet_ifa_match(b, ifa)) {
324 				rcu_read_unlock();
325 				return 1;
326 			}
327 		}
328 	} endfor_ifa(in_dev);
329 	rcu_read_unlock();
330 	return 0;
331 }
332 
333 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
334 			 int destroy, struct nlmsghdr *nlh, u32 portid)
335 {
336 	struct in_ifaddr *promote = NULL;
337 	struct in_ifaddr *ifa, *ifa1 = *ifap;
338 	struct in_ifaddr *last_prim = in_dev->ifa_list;
339 	struct in_ifaddr *prev_prom = NULL;
340 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
341 
342 	ASSERT_RTNL();
343 
344 	if (in_dev->dead)
345 		goto no_promotions;
346 
347 	/* 1. Deleting primary ifaddr forces deletion all secondaries
348 	 * unless alias promotion is set
349 	 **/
350 
351 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
352 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
353 
354 		while ((ifa = *ifap1) != NULL) {
355 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
356 			    ifa1->ifa_scope <= ifa->ifa_scope)
357 				last_prim = ifa;
358 
359 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
360 			    ifa1->ifa_mask != ifa->ifa_mask ||
361 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
362 				ifap1 = &ifa->ifa_next;
363 				prev_prom = ifa;
364 				continue;
365 			}
366 
367 			if (!do_promote) {
368 				inet_hash_remove(ifa);
369 				*ifap1 = ifa->ifa_next;
370 
371 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
372 				blocking_notifier_call_chain(&inetaddr_chain,
373 						NETDEV_DOWN, ifa);
374 				inet_free_ifa(ifa);
375 			} else {
376 				promote = ifa;
377 				break;
378 			}
379 		}
380 	}
381 
382 	/* On promotion all secondaries from subnet are changing
383 	 * the primary IP, we must remove all their routes silently
384 	 * and later to add them back with new prefsrc. Do this
385 	 * while all addresses are on the device list.
386 	 */
387 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
388 		if (ifa1->ifa_mask == ifa->ifa_mask &&
389 		    inet_ifa_match(ifa1->ifa_address, ifa))
390 			fib_del_ifaddr(ifa, ifa1);
391 	}
392 
393 no_promotions:
394 	/* 2. Unlink it */
395 
396 	*ifap = ifa1->ifa_next;
397 	inet_hash_remove(ifa1);
398 
399 	/* 3. Announce address deletion */
400 
401 	/* Send message first, then call notifier.
402 	   At first sight, FIB update triggered by notifier
403 	   will refer to already deleted ifaddr, that could confuse
404 	   netlink listeners. It is not true: look, gated sees
405 	   that route deleted and if it still thinks that ifaddr
406 	   is valid, it will try to restore deleted routes... Grr.
407 	   So that, this order is correct.
408 	 */
409 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
410 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
411 
412 	if (promote) {
413 		struct in_ifaddr *next_sec = promote->ifa_next;
414 
415 		if (prev_prom) {
416 			prev_prom->ifa_next = promote->ifa_next;
417 			promote->ifa_next = last_prim->ifa_next;
418 			last_prim->ifa_next = promote;
419 		}
420 
421 		promote->ifa_flags &= ~IFA_F_SECONDARY;
422 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
423 		blocking_notifier_call_chain(&inetaddr_chain,
424 				NETDEV_UP, promote);
425 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
426 			if (ifa1->ifa_mask != ifa->ifa_mask ||
427 			    !inet_ifa_match(ifa1->ifa_address, ifa))
428 					continue;
429 			fib_add_ifaddr(ifa);
430 		}
431 
432 	}
433 	if (destroy)
434 		inet_free_ifa(ifa1);
435 }
436 
437 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
438 			 int destroy)
439 {
440 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
441 }
442 
443 static void check_lifetime(struct work_struct *work);
444 
445 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
446 
447 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
448 			     u32 portid, struct netlink_ext_ack *extack)
449 {
450 	struct in_device *in_dev = ifa->ifa_dev;
451 	struct in_ifaddr *ifa1, **ifap, **last_primary;
452 	struct in_validator_info ivi;
453 	int ret;
454 
455 	ASSERT_RTNL();
456 
457 	if (!ifa->ifa_local) {
458 		inet_free_ifa(ifa);
459 		return 0;
460 	}
461 
462 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
463 	last_primary = &in_dev->ifa_list;
464 
465 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
466 	     ifap = &ifa1->ifa_next) {
467 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
468 		    ifa->ifa_scope <= ifa1->ifa_scope)
469 			last_primary = &ifa1->ifa_next;
470 		if (ifa1->ifa_mask == ifa->ifa_mask &&
471 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
472 			if (ifa1->ifa_local == ifa->ifa_local) {
473 				inet_free_ifa(ifa);
474 				return -EEXIST;
475 			}
476 			if (ifa1->ifa_scope != ifa->ifa_scope) {
477 				inet_free_ifa(ifa);
478 				return -EINVAL;
479 			}
480 			ifa->ifa_flags |= IFA_F_SECONDARY;
481 		}
482 	}
483 
484 	/* Allow any devices that wish to register ifaddr validtors to weigh
485 	 * in now, before changes are committed.  The rntl lock is serializing
486 	 * access here, so the state should not change between a validator call
487 	 * and a final notify on commit.  This isn't invoked on promotion under
488 	 * the assumption that validators are checking the address itself, and
489 	 * not the flags.
490 	 */
491 	ivi.ivi_addr = ifa->ifa_address;
492 	ivi.ivi_dev = ifa->ifa_dev;
493 	ivi.extack = extack;
494 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
495 					   NETDEV_UP, &ivi);
496 	ret = notifier_to_errno(ret);
497 	if (ret) {
498 		inet_free_ifa(ifa);
499 		return ret;
500 	}
501 
502 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
503 		prandom_seed((__force u32) ifa->ifa_local);
504 		ifap = last_primary;
505 	}
506 
507 	ifa->ifa_next = *ifap;
508 	*ifap = ifa;
509 
510 	inet_hash_insert(dev_net(in_dev->dev), ifa);
511 
512 	cancel_delayed_work(&check_lifetime_work);
513 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
514 
515 	/* Send message first, then call notifier.
516 	   Notifier will trigger FIB update, so that
517 	   listeners of netlink will know about new ifaddr */
518 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
519 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
520 
521 	return 0;
522 }
523 
524 static int inet_insert_ifa(struct in_ifaddr *ifa)
525 {
526 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
527 }
528 
529 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
530 {
531 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
532 
533 	ASSERT_RTNL();
534 
535 	if (!in_dev) {
536 		inet_free_ifa(ifa);
537 		return -ENOBUFS;
538 	}
539 	ipv4_devconf_setall(in_dev);
540 	neigh_parms_data_state_setall(in_dev->arp_parms);
541 	if (ifa->ifa_dev != in_dev) {
542 		WARN_ON(ifa->ifa_dev);
543 		in_dev_hold(in_dev);
544 		ifa->ifa_dev = in_dev;
545 	}
546 	if (ipv4_is_loopback(ifa->ifa_local))
547 		ifa->ifa_scope = RT_SCOPE_HOST;
548 	return inet_insert_ifa(ifa);
549 }
550 
551 /* Caller must hold RCU or RTNL :
552  * We dont take a reference on found in_device
553  */
554 struct in_device *inetdev_by_index(struct net *net, int ifindex)
555 {
556 	struct net_device *dev;
557 	struct in_device *in_dev = NULL;
558 
559 	rcu_read_lock();
560 	dev = dev_get_by_index_rcu(net, ifindex);
561 	if (dev)
562 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
563 	rcu_read_unlock();
564 	return in_dev;
565 }
566 EXPORT_SYMBOL(inetdev_by_index);
567 
568 /* Called only from RTNL semaphored context. No locks. */
569 
570 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
571 				    __be32 mask)
572 {
573 	ASSERT_RTNL();
574 
575 	for_primary_ifa(in_dev) {
576 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
577 			return ifa;
578 	} endfor_ifa(in_dev);
579 	return NULL;
580 }
581 
582 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
583 {
584 	struct ip_mreqn mreq = {
585 		.imr_multiaddr.s_addr = ifa->ifa_address,
586 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
587 	};
588 	int ret;
589 
590 	ASSERT_RTNL();
591 
592 	lock_sock(sk);
593 	if (join)
594 		ret = ip_mc_join_group(sk, &mreq);
595 	else
596 		ret = ip_mc_leave_group(sk, &mreq);
597 	release_sock(sk);
598 
599 	return ret;
600 }
601 
602 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
603 			    struct netlink_ext_ack *extack)
604 {
605 	struct net *net = sock_net(skb->sk);
606 	struct nlattr *tb[IFA_MAX+1];
607 	struct in_device *in_dev;
608 	struct ifaddrmsg *ifm;
609 	struct in_ifaddr *ifa, **ifap;
610 	int err = -EINVAL;
611 
612 	ASSERT_RTNL();
613 
614 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
615 			  extack);
616 	if (err < 0)
617 		goto errout;
618 
619 	ifm = nlmsg_data(nlh);
620 	in_dev = inetdev_by_index(net, ifm->ifa_index);
621 	if (!in_dev) {
622 		err = -ENODEV;
623 		goto errout;
624 	}
625 
626 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
627 	     ifap = &ifa->ifa_next) {
628 		if (tb[IFA_LOCAL] &&
629 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
630 			continue;
631 
632 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
633 			continue;
634 
635 		if (tb[IFA_ADDRESS] &&
636 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
637 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
638 			continue;
639 
640 		if (ipv4_is_multicast(ifa->ifa_address))
641 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
642 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
643 		return 0;
644 	}
645 
646 	err = -EADDRNOTAVAIL;
647 errout:
648 	return err;
649 }
650 
651 #define INFINITY_LIFE_TIME	0xFFFFFFFF
652 
653 static void check_lifetime(struct work_struct *work)
654 {
655 	unsigned long now, next, next_sec, next_sched;
656 	struct in_ifaddr *ifa;
657 	struct hlist_node *n;
658 	int i;
659 
660 	now = jiffies;
661 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
662 
663 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
664 		bool change_needed = false;
665 
666 		rcu_read_lock();
667 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
668 			unsigned long age;
669 
670 			if (ifa->ifa_flags & IFA_F_PERMANENT)
671 				continue;
672 
673 			/* We try to batch several events at once. */
674 			age = (now - ifa->ifa_tstamp +
675 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
676 
677 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
678 			    age >= ifa->ifa_valid_lft) {
679 				change_needed = true;
680 			} else if (ifa->ifa_preferred_lft ==
681 				   INFINITY_LIFE_TIME) {
682 				continue;
683 			} else if (age >= ifa->ifa_preferred_lft) {
684 				if (time_before(ifa->ifa_tstamp +
685 						ifa->ifa_valid_lft * HZ, next))
686 					next = ifa->ifa_tstamp +
687 					       ifa->ifa_valid_lft * HZ;
688 
689 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
690 					change_needed = true;
691 			} else if (time_before(ifa->ifa_tstamp +
692 					       ifa->ifa_preferred_lft * HZ,
693 					       next)) {
694 				next = ifa->ifa_tstamp +
695 				       ifa->ifa_preferred_lft * HZ;
696 			}
697 		}
698 		rcu_read_unlock();
699 		if (!change_needed)
700 			continue;
701 		rtnl_lock();
702 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
703 			unsigned long age;
704 
705 			if (ifa->ifa_flags & IFA_F_PERMANENT)
706 				continue;
707 
708 			/* We try to batch several events at once. */
709 			age = (now - ifa->ifa_tstamp +
710 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
711 
712 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
713 			    age >= ifa->ifa_valid_lft) {
714 				struct in_ifaddr **ifap;
715 
716 				for (ifap = &ifa->ifa_dev->ifa_list;
717 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
718 					if (*ifap == ifa) {
719 						inet_del_ifa(ifa->ifa_dev,
720 							     ifap, 1);
721 						break;
722 					}
723 				}
724 			} else if (ifa->ifa_preferred_lft !=
725 				   INFINITY_LIFE_TIME &&
726 				   age >= ifa->ifa_preferred_lft &&
727 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
728 				ifa->ifa_flags |= IFA_F_DEPRECATED;
729 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
730 			}
731 		}
732 		rtnl_unlock();
733 	}
734 
735 	next_sec = round_jiffies_up(next);
736 	next_sched = next;
737 
738 	/* If rounded timeout is accurate enough, accept it. */
739 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
740 		next_sched = next_sec;
741 
742 	now = jiffies;
743 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
744 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
745 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
746 
747 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
748 			next_sched - now);
749 }
750 
751 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
752 			     __u32 prefered_lft)
753 {
754 	unsigned long timeout;
755 
756 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
757 
758 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
759 	if (addrconf_finite_timeout(timeout))
760 		ifa->ifa_valid_lft = timeout;
761 	else
762 		ifa->ifa_flags |= IFA_F_PERMANENT;
763 
764 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
765 	if (addrconf_finite_timeout(timeout)) {
766 		if (timeout == 0)
767 			ifa->ifa_flags |= IFA_F_DEPRECATED;
768 		ifa->ifa_preferred_lft = timeout;
769 	}
770 	ifa->ifa_tstamp = jiffies;
771 	if (!ifa->ifa_cstamp)
772 		ifa->ifa_cstamp = ifa->ifa_tstamp;
773 }
774 
775 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
776 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
777 {
778 	struct nlattr *tb[IFA_MAX+1];
779 	struct in_ifaddr *ifa;
780 	struct ifaddrmsg *ifm;
781 	struct net_device *dev;
782 	struct in_device *in_dev;
783 	int err;
784 
785 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
786 			  NULL);
787 	if (err < 0)
788 		goto errout;
789 
790 	ifm = nlmsg_data(nlh);
791 	err = -EINVAL;
792 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
793 		goto errout;
794 
795 	dev = __dev_get_by_index(net, ifm->ifa_index);
796 	err = -ENODEV;
797 	if (!dev)
798 		goto errout;
799 
800 	in_dev = __in_dev_get_rtnl(dev);
801 	err = -ENOBUFS;
802 	if (!in_dev)
803 		goto errout;
804 
805 	ifa = inet_alloc_ifa();
806 	if (!ifa)
807 		/*
808 		 * A potential indev allocation can be left alive, it stays
809 		 * assigned to its device and is destroy with it.
810 		 */
811 		goto errout;
812 
813 	ipv4_devconf_setall(in_dev);
814 	neigh_parms_data_state_setall(in_dev->arp_parms);
815 	in_dev_hold(in_dev);
816 
817 	if (!tb[IFA_ADDRESS])
818 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
819 
820 	INIT_HLIST_NODE(&ifa->hash);
821 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
822 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
823 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
824 					 ifm->ifa_flags;
825 	ifa->ifa_scope = ifm->ifa_scope;
826 	ifa->ifa_dev = in_dev;
827 
828 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
829 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
830 
831 	if (tb[IFA_BROADCAST])
832 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
833 
834 	if (tb[IFA_LABEL])
835 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
836 	else
837 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
838 
839 	if (tb[IFA_RT_PRIORITY])
840 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
841 
842 	if (tb[IFA_CACHEINFO]) {
843 		struct ifa_cacheinfo *ci;
844 
845 		ci = nla_data(tb[IFA_CACHEINFO]);
846 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
847 			err = -EINVAL;
848 			goto errout_free;
849 		}
850 		*pvalid_lft = ci->ifa_valid;
851 		*pprefered_lft = ci->ifa_prefered;
852 	}
853 
854 	return ifa;
855 
856 errout_free:
857 	inet_free_ifa(ifa);
858 errout:
859 	return ERR_PTR(err);
860 }
861 
862 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
863 {
864 	struct in_device *in_dev = ifa->ifa_dev;
865 	struct in_ifaddr *ifa1, **ifap;
866 
867 	if (!ifa->ifa_local)
868 		return NULL;
869 
870 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
871 	     ifap = &ifa1->ifa_next) {
872 		if (ifa1->ifa_mask == ifa->ifa_mask &&
873 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
874 		    ifa1->ifa_local == ifa->ifa_local)
875 			return ifa1;
876 	}
877 	return NULL;
878 }
879 
880 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
881 			    struct netlink_ext_ack *extack)
882 {
883 	struct net *net = sock_net(skb->sk);
884 	struct in_ifaddr *ifa;
885 	struct in_ifaddr *ifa_existing;
886 	__u32 valid_lft = INFINITY_LIFE_TIME;
887 	__u32 prefered_lft = INFINITY_LIFE_TIME;
888 
889 	ASSERT_RTNL();
890 
891 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
892 	if (IS_ERR(ifa))
893 		return PTR_ERR(ifa);
894 
895 	ifa_existing = find_matching_ifa(ifa);
896 	if (!ifa_existing) {
897 		/* It would be best to check for !NLM_F_CREATE here but
898 		 * userspace already relies on not having to provide this.
899 		 */
900 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
901 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
902 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
903 					       true, ifa);
904 
905 			if (ret < 0) {
906 				inet_free_ifa(ifa);
907 				return ret;
908 			}
909 		}
910 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
911 					 extack);
912 	} else {
913 		u32 new_metric = ifa->ifa_rt_priority;
914 
915 		inet_free_ifa(ifa);
916 
917 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
918 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
919 			return -EEXIST;
920 		ifa = ifa_existing;
921 
922 		if (ifa->ifa_rt_priority != new_metric) {
923 			fib_modify_prefix_metric(ifa, new_metric);
924 			ifa->ifa_rt_priority = new_metric;
925 		}
926 
927 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
928 		cancel_delayed_work(&check_lifetime_work);
929 		queue_delayed_work(system_power_efficient_wq,
930 				&check_lifetime_work, 0);
931 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
932 	}
933 	return 0;
934 }
935 
936 /*
937  *	Determine a default network mask, based on the IP address.
938  */
939 
940 static int inet_abc_len(__be32 addr)
941 {
942 	int rc = -1;	/* Something else, probably a multicast. */
943 
944 	if (ipv4_is_zeronet(addr))
945 		rc = 0;
946 	else {
947 		__u32 haddr = ntohl(addr);
948 
949 		if (IN_CLASSA(haddr))
950 			rc = 8;
951 		else if (IN_CLASSB(haddr))
952 			rc = 16;
953 		else if (IN_CLASSC(haddr))
954 			rc = 24;
955 	}
956 
957 	return rc;
958 }
959 
960 
961 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
962 {
963 	struct sockaddr_in sin_orig;
964 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
965 	struct in_device *in_dev;
966 	struct in_ifaddr **ifap = NULL;
967 	struct in_ifaddr *ifa = NULL;
968 	struct net_device *dev;
969 	char *colon;
970 	int ret = -EFAULT;
971 	int tryaddrmatch = 0;
972 
973 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
974 
975 	/* save original address for comparison */
976 	memcpy(&sin_orig, sin, sizeof(*sin));
977 
978 	colon = strchr(ifr->ifr_name, ':');
979 	if (colon)
980 		*colon = 0;
981 
982 	dev_load(net, ifr->ifr_name);
983 
984 	switch (cmd) {
985 	case SIOCGIFADDR:	/* Get interface address */
986 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
987 	case SIOCGIFDSTADDR:	/* Get the destination address */
988 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
989 		/* Note that these ioctls will not sleep,
990 		   so that we do not impose a lock.
991 		   One day we will be forced to put shlock here (I mean SMP)
992 		 */
993 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
994 		memset(sin, 0, sizeof(*sin));
995 		sin->sin_family = AF_INET;
996 		break;
997 
998 	case SIOCSIFFLAGS:
999 		ret = -EPERM;
1000 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1001 			goto out;
1002 		break;
1003 	case SIOCSIFADDR:	/* Set interface address (and family) */
1004 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1005 	case SIOCSIFDSTADDR:	/* Set the destination address */
1006 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1007 		ret = -EPERM;
1008 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1009 			goto out;
1010 		ret = -EINVAL;
1011 		if (sin->sin_family != AF_INET)
1012 			goto out;
1013 		break;
1014 	default:
1015 		ret = -EINVAL;
1016 		goto out;
1017 	}
1018 
1019 	rtnl_lock();
1020 
1021 	ret = -ENODEV;
1022 	dev = __dev_get_by_name(net, ifr->ifr_name);
1023 	if (!dev)
1024 		goto done;
1025 
1026 	if (colon)
1027 		*colon = ':';
1028 
1029 	in_dev = __in_dev_get_rtnl(dev);
1030 	if (in_dev) {
1031 		if (tryaddrmatch) {
1032 			/* Matthias Andree */
1033 			/* compare label and address (4.4BSD style) */
1034 			/* note: we only do this for a limited set of ioctls
1035 			   and only if the original address family was AF_INET.
1036 			   This is checked above. */
1037 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1038 			     ifap = &ifa->ifa_next) {
1039 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1040 				    sin_orig.sin_addr.s_addr ==
1041 							ifa->ifa_local) {
1042 					break; /* found */
1043 				}
1044 			}
1045 		}
1046 		/* we didn't get a match, maybe the application is
1047 		   4.3BSD-style and passed in junk so we fall back to
1048 		   comparing just the label */
1049 		if (!ifa) {
1050 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1051 			     ifap = &ifa->ifa_next)
1052 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1053 					break;
1054 		}
1055 	}
1056 
1057 	ret = -EADDRNOTAVAIL;
1058 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1059 		goto done;
1060 
1061 	switch (cmd) {
1062 	case SIOCGIFADDR:	/* Get interface address */
1063 		ret = 0;
1064 		sin->sin_addr.s_addr = ifa->ifa_local;
1065 		break;
1066 
1067 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1068 		ret = 0;
1069 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1070 		break;
1071 
1072 	case SIOCGIFDSTADDR:	/* Get the destination address */
1073 		ret = 0;
1074 		sin->sin_addr.s_addr = ifa->ifa_address;
1075 		break;
1076 
1077 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1078 		ret = 0;
1079 		sin->sin_addr.s_addr = ifa->ifa_mask;
1080 		break;
1081 
1082 	case SIOCSIFFLAGS:
1083 		if (colon) {
1084 			ret = -EADDRNOTAVAIL;
1085 			if (!ifa)
1086 				break;
1087 			ret = 0;
1088 			if (!(ifr->ifr_flags & IFF_UP))
1089 				inet_del_ifa(in_dev, ifap, 1);
1090 			break;
1091 		}
1092 		ret = dev_change_flags(dev, ifr->ifr_flags);
1093 		break;
1094 
1095 	case SIOCSIFADDR:	/* Set interface address (and family) */
1096 		ret = -EINVAL;
1097 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1098 			break;
1099 
1100 		if (!ifa) {
1101 			ret = -ENOBUFS;
1102 			ifa = inet_alloc_ifa();
1103 			if (!ifa)
1104 				break;
1105 			INIT_HLIST_NODE(&ifa->hash);
1106 			if (colon)
1107 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1108 			else
1109 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1110 		} else {
1111 			ret = 0;
1112 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1113 				break;
1114 			inet_del_ifa(in_dev, ifap, 0);
1115 			ifa->ifa_broadcast = 0;
1116 			ifa->ifa_scope = 0;
1117 		}
1118 
1119 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1120 
1121 		if (!(dev->flags & IFF_POINTOPOINT)) {
1122 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1123 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1124 			if ((dev->flags & IFF_BROADCAST) &&
1125 			    ifa->ifa_prefixlen < 31)
1126 				ifa->ifa_broadcast = ifa->ifa_address |
1127 						     ~ifa->ifa_mask;
1128 		} else {
1129 			ifa->ifa_prefixlen = 32;
1130 			ifa->ifa_mask = inet_make_mask(32);
1131 		}
1132 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1133 		ret = inet_set_ifa(dev, ifa);
1134 		break;
1135 
1136 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1137 		ret = 0;
1138 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1139 			inet_del_ifa(in_dev, ifap, 0);
1140 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1141 			inet_insert_ifa(ifa);
1142 		}
1143 		break;
1144 
1145 	case SIOCSIFDSTADDR:	/* Set the destination address */
1146 		ret = 0;
1147 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1148 			break;
1149 		ret = -EINVAL;
1150 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1151 			break;
1152 		ret = 0;
1153 		inet_del_ifa(in_dev, ifap, 0);
1154 		ifa->ifa_address = sin->sin_addr.s_addr;
1155 		inet_insert_ifa(ifa);
1156 		break;
1157 
1158 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1159 
1160 		/*
1161 		 *	The mask we set must be legal.
1162 		 */
1163 		ret = -EINVAL;
1164 		if (bad_mask(sin->sin_addr.s_addr, 0))
1165 			break;
1166 		ret = 0;
1167 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1168 			__be32 old_mask = ifa->ifa_mask;
1169 			inet_del_ifa(in_dev, ifap, 0);
1170 			ifa->ifa_mask = sin->sin_addr.s_addr;
1171 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1172 
1173 			/* See if current broadcast address matches
1174 			 * with current netmask, then recalculate
1175 			 * the broadcast address. Otherwise it's a
1176 			 * funny address, so don't touch it since
1177 			 * the user seems to know what (s)he's doing...
1178 			 */
1179 			if ((dev->flags & IFF_BROADCAST) &&
1180 			    (ifa->ifa_prefixlen < 31) &&
1181 			    (ifa->ifa_broadcast ==
1182 			     (ifa->ifa_local|~old_mask))) {
1183 				ifa->ifa_broadcast = (ifa->ifa_local |
1184 						      ~sin->sin_addr.s_addr);
1185 			}
1186 			inet_insert_ifa(ifa);
1187 		}
1188 		break;
1189 	}
1190 done:
1191 	rtnl_unlock();
1192 out:
1193 	return ret;
1194 }
1195 
1196 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1197 {
1198 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1199 	struct in_ifaddr *ifa;
1200 	struct ifreq ifr;
1201 	int done = 0;
1202 
1203 	if (WARN_ON(size > sizeof(struct ifreq)))
1204 		goto out;
1205 
1206 	if (!in_dev)
1207 		goto out;
1208 
1209 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1210 		if (!buf) {
1211 			done += size;
1212 			continue;
1213 		}
1214 		if (len < size)
1215 			break;
1216 		memset(&ifr, 0, sizeof(struct ifreq));
1217 		strcpy(ifr.ifr_name, ifa->ifa_label);
1218 
1219 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1220 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1221 								ifa->ifa_local;
1222 
1223 		if (copy_to_user(buf + done, &ifr, size)) {
1224 			done = -EFAULT;
1225 			break;
1226 		}
1227 		len  -= size;
1228 		done += size;
1229 	}
1230 out:
1231 	return done;
1232 }
1233 
1234 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1235 				 int scope)
1236 {
1237 	for_primary_ifa(in_dev) {
1238 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1239 		    ifa->ifa_scope <= scope)
1240 			return ifa->ifa_local;
1241 	} endfor_ifa(in_dev);
1242 
1243 	return 0;
1244 }
1245 
1246 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1247 {
1248 	__be32 addr = 0;
1249 	struct in_device *in_dev;
1250 	struct net *net = dev_net(dev);
1251 	int master_idx;
1252 
1253 	rcu_read_lock();
1254 	in_dev = __in_dev_get_rcu(dev);
1255 	if (!in_dev)
1256 		goto no_in_dev;
1257 
1258 	for_primary_ifa(in_dev) {
1259 		if (ifa->ifa_scope > scope)
1260 			continue;
1261 		if (!dst || inet_ifa_match(dst, ifa)) {
1262 			addr = ifa->ifa_local;
1263 			break;
1264 		}
1265 		if (!addr)
1266 			addr = ifa->ifa_local;
1267 	} endfor_ifa(in_dev);
1268 
1269 	if (addr)
1270 		goto out_unlock;
1271 no_in_dev:
1272 	master_idx = l3mdev_master_ifindex_rcu(dev);
1273 
1274 	/* For VRFs, the VRF device takes the place of the loopback device,
1275 	 * with addresses on it being preferred.  Note in such cases the
1276 	 * loopback device will be among the devices that fail the master_idx
1277 	 * equality check in the loop below.
1278 	 */
1279 	if (master_idx &&
1280 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1281 	    (in_dev = __in_dev_get_rcu(dev))) {
1282 		addr = in_dev_select_addr(in_dev, scope);
1283 		if (addr)
1284 			goto out_unlock;
1285 	}
1286 
1287 	/* Not loopback addresses on loopback should be preferred
1288 	   in this case. It is important that lo is the first interface
1289 	   in dev_base list.
1290 	 */
1291 	for_each_netdev_rcu(net, dev) {
1292 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1293 			continue;
1294 
1295 		in_dev = __in_dev_get_rcu(dev);
1296 		if (!in_dev)
1297 			continue;
1298 
1299 		addr = in_dev_select_addr(in_dev, scope);
1300 		if (addr)
1301 			goto out_unlock;
1302 	}
1303 out_unlock:
1304 	rcu_read_unlock();
1305 	return addr;
1306 }
1307 EXPORT_SYMBOL(inet_select_addr);
1308 
1309 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1310 			      __be32 local, int scope)
1311 {
1312 	int same = 0;
1313 	__be32 addr = 0;
1314 
1315 	for_ifa(in_dev) {
1316 		if (!addr &&
1317 		    (local == ifa->ifa_local || !local) &&
1318 		    ifa->ifa_scope <= scope) {
1319 			addr = ifa->ifa_local;
1320 			if (same)
1321 				break;
1322 		}
1323 		if (!same) {
1324 			same = (!local || inet_ifa_match(local, ifa)) &&
1325 				(!dst || inet_ifa_match(dst, ifa));
1326 			if (same && addr) {
1327 				if (local || !dst)
1328 					break;
1329 				/* Is the selected addr into dst subnet? */
1330 				if (inet_ifa_match(addr, ifa))
1331 					break;
1332 				/* No, then can we use new local src? */
1333 				if (ifa->ifa_scope <= scope) {
1334 					addr = ifa->ifa_local;
1335 					break;
1336 				}
1337 				/* search for large dst subnet for addr */
1338 				same = 0;
1339 			}
1340 		}
1341 	} endfor_ifa(in_dev);
1342 
1343 	return same ? addr : 0;
1344 }
1345 
1346 /*
1347  * Confirm that local IP address exists using wildcards:
1348  * - net: netns to check, cannot be NULL
1349  * - in_dev: only on this interface, NULL=any interface
1350  * - dst: only in the same subnet as dst, 0=any dst
1351  * - local: address, 0=autoselect the local address
1352  * - scope: maximum allowed scope value for the local address
1353  */
1354 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1355 			 __be32 dst, __be32 local, int scope)
1356 {
1357 	__be32 addr = 0;
1358 	struct net_device *dev;
1359 
1360 	if (in_dev)
1361 		return confirm_addr_indev(in_dev, dst, local, scope);
1362 
1363 	rcu_read_lock();
1364 	for_each_netdev_rcu(net, dev) {
1365 		in_dev = __in_dev_get_rcu(dev);
1366 		if (in_dev) {
1367 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1368 			if (addr)
1369 				break;
1370 		}
1371 	}
1372 	rcu_read_unlock();
1373 
1374 	return addr;
1375 }
1376 EXPORT_SYMBOL(inet_confirm_addr);
1377 
1378 /*
1379  *	Device notifier
1380  */
1381 
1382 int register_inetaddr_notifier(struct notifier_block *nb)
1383 {
1384 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1385 }
1386 EXPORT_SYMBOL(register_inetaddr_notifier);
1387 
1388 int unregister_inetaddr_notifier(struct notifier_block *nb)
1389 {
1390 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1391 }
1392 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1393 
1394 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1395 {
1396 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1397 }
1398 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1399 
1400 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1401 {
1402 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1403 	    nb);
1404 }
1405 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1406 
1407 /* Rename ifa_labels for a device name change. Make some effort to preserve
1408  * existing alias numbering and to create unique labels if possible.
1409 */
1410 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1411 {
1412 	struct in_ifaddr *ifa;
1413 	int named = 0;
1414 
1415 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1416 		char old[IFNAMSIZ], *dot;
1417 
1418 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1419 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1420 		if (named++ == 0)
1421 			goto skip;
1422 		dot = strchr(old, ':');
1423 		if (!dot) {
1424 			sprintf(old, ":%d", named);
1425 			dot = old;
1426 		}
1427 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1428 			strcat(ifa->ifa_label, dot);
1429 		else
1430 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1431 skip:
1432 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1433 	}
1434 }
1435 
1436 static bool inetdev_valid_mtu(unsigned int mtu)
1437 {
1438 	return mtu >= IPV4_MIN_MTU;
1439 }
1440 
1441 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1442 					struct in_device *in_dev)
1443 
1444 {
1445 	struct in_ifaddr *ifa;
1446 
1447 	for (ifa = in_dev->ifa_list; ifa;
1448 	     ifa = ifa->ifa_next) {
1449 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1450 			 ifa->ifa_local, dev,
1451 			 ifa->ifa_local, NULL,
1452 			 dev->dev_addr, NULL);
1453 	}
1454 }
1455 
1456 /* Called only under RTNL semaphore */
1457 
1458 static int inetdev_event(struct notifier_block *this, unsigned long event,
1459 			 void *ptr)
1460 {
1461 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1462 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1463 
1464 	ASSERT_RTNL();
1465 
1466 	if (!in_dev) {
1467 		if (event == NETDEV_REGISTER) {
1468 			in_dev = inetdev_init(dev);
1469 			if (IS_ERR(in_dev))
1470 				return notifier_from_errno(PTR_ERR(in_dev));
1471 			if (dev->flags & IFF_LOOPBACK) {
1472 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1473 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1474 			}
1475 		} else if (event == NETDEV_CHANGEMTU) {
1476 			/* Re-enabling IP */
1477 			if (inetdev_valid_mtu(dev->mtu))
1478 				in_dev = inetdev_init(dev);
1479 		}
1480 		goto out;
1481 	}
1482 
1483 	switch (event) {
1484 	case NETDEV_REGISTER:
1485 		pr_debug("%s: bug\n", __func__);
1486 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1487 		break;
1488 	case NETDEV_UP:
1489 		if (!inetdev_valid_mtu(dev->mtu))
1490 			break;
1491 		if (dev->flags & IFF_LOOPBACK) {
1492 			struct in_ifaddr *ifa = inet_alloc_ifa();
1493 
1494 			if (ifa) {
1495 				INIT_HLIST_NODE(&ifa->hash);
1496 				ifa->ifa_local =
1497 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1498 				ifa->ifa_prefixlen = 8;
1499 				ifa->ifa_mask = inet_make_mask(8);
1500 				in_dev_hold(in_dev);
1501 				ifa->ifa_dev = in_dev;
1502 				ifa->ifa_scope = RT_SCOPE_HOST;
1503 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1504 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1505 						 INFINITY_LIFE_TIME);
1506 				ipv4_devconf_setall(in_dev);
1507 				neigh_parms_data_state_setall(in_dev->arp_parms);
1508 				inet_insert_ifa(ifa);
1509 			}
1510 		}
1511 		ip_mc_up(in_dev);
1512 		/* fall through */
1513 	case NETDEV_CHANGEADDR:
1514 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1515 			break;
1516 		/* fall through */
1517 	case NETDEV_NOTIFY_PEERS:
1518 		/* Send gratuitous ARP to notify of link change */
1519 		inetdev_send_gratuitous_arp(dev, in_dev);
1520 		break;
1521 	case NETDEV_DOWN:
1522 		ip_mc_down(in_dev);
1523 		break;
1524 	case NETDEV_PRE_TYPE_CHANGE:
1525 		ip_mc_unmap(in_dev);
1526 		break;
1527 	case NETDEV_POST_TYPE_CHANGE:
1528 		ip_mc_remap(in_dev);
1529 		break;
1530 	case NETDEV_CHANGEMTU:
1531 		if (inetdev_valid_mtu(dev->mtu))
1532 			break;
1533 		/* disable IP when MTU is not enough */
1534 		/* fall through */
1535 	case NETDEV_UNREGISTER:
1536 		inetdev_destroy(in_dev);
1537 		break;
1538 	case NETDEV_CHANGENAME:
1539 		/* Do not notify about label change, this event is
1540 		 * not interesting to applications using netlink.
1541 		 */
1542 		inetdev_changename(dev, in_dev);
1543 
1544 		devinet_sysctl_unregister(in_dev);
1545 		devinet_sysctl_register(in_dev);
1546 		break;
1547 	}
1548 out:
1549 	return NOTIFY_DONE;
1550 }
1551 
1552 static struct notifier_block ip_netdev_notifier = {
1553 	.notifier_call = inetdev_event,
1554 };
1555 
1556 static size_t inet_nlmsg_size(void)
1557 {
1558 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1559 	       + nla_total_size(4) /* IFA_ADDRESS */
1560 	       + nla_total_size(4) /* IFA_LOCAL */
1561 	       + nla_total_size(4) /* IFA_BROADCAST */
1562 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1563 	       + nla_total_size(4)  /* IFA_FLAGS */
1564 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1565 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1566 }
1567 
1568 static inline u32 cstamp_delta(unsigned long cstamp)
1569 {
1570 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1571 }
1572 
1573 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1574 			 unsigned long tstamp, u32 preferred, u32 valid)
1575 {
1576 	struct ifa_cacheinfo ci;
1577 
1578 	ci.cstamp = cstamp_delta(cstamp);
1579 	ci.tstamp = cstamp_delta(tstamp);
1580 	ci.ifa_prefered = preferred;
1581 	ci.ifa_valid = valid;
1582 
1583 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1584 }
1585 
1586 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1587 			    u32 portid, u32 seq, int event, unsigned int flags)
1588 {
1589 	struct ifaddrmsg *ifm;
1590 	struct nlmsghdr  *nlh;
1591 	u32 preferred, valid;
1592 
1593 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1594 	if (!nlh)
1595 		return -EMSGSIZE;
1596 
1597 	ifm = nlmsg_data(nlh);
1598 	ifm->ifa_family = AF_INET;
1599 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1600 	ifm->ifa_flags = ifa->ifa_flags;
1601 	ifm->ifa_scope = ifa->ifa_scope;
1602 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1603 
1604 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1605 		preferred = ifa->ifa_preferred_lft;
1606 		valid = ifa->ifa_valid_lft;
1607 		if (preferred != INFINITY_LIFE_TIME) {
1608 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1609 
1610 			if (preferred > tval)
1611 				preferred -= tval;
1612 			else
1613 				preferred = 0;
1614 			if (valid != INFINITY_LIFE_TIME) {
1615 				if (valid > tval)
1616 					valid -= tval;
1617 				else
1618 					valid = 0;
1619 			}
1620 		}
1621 	} else {
1622 		preferred = INFINITY_LIFE_TIME;
1623 		valid = INFINITY_LIFE_TIME;
1624 	}
1625 	if ((ifa->ifa_address &&
1626 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1627 	    (ifa->ifa_local &&
1628 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1629 	    (ifa->ifa_broadcast &&
1630 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1631 	    (ifa->ifa_label[0] &&
1632 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1633 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1634 	    (ifa->ifa_rt_priority &&
1635 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1636 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1637 			  preferred, valid))
1638 		goto nla_put_failure;
1639 
1640 	nlmsg_end(skb, nlh);
1641 	return 0;
1642 
1643 nla_put_failure:
1644 	nlmsg_cancel(skb, nlh);
1645 	return -EMSGSIZE;
1646 }
1647 
1648 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1649 {
1650 	struct net *net = sock_net(skb->sk);
1651 	int h, s_h;
1652 	int idx, s_idx;
1653 	int ip_idx, s_ip_idx;
1654 	struct net_device *dev;
1655 	struct in_device *in_dev;
1656 	struct in_ifaddr *ifa;
1657 	struct hlist_head *head;
1658 
1659 	s_h = cb->args[0];
1660 	s_idx = idx = cb->args[1];
1661 	s_ip_idx = ip_idx = cb->args[2];
1662 
1663 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1664 		idx = 0;
1665 		head = &net->dev_index_head[h];
1666 		rcu_read_lock();
1667 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1668 			  net->dev_base_seq;
1669 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1670 			if (idx < s_idx)
1671 				goto cont;
1672 			if (h > s_h || idx > s_idx)
1673 				s_ip_idx = 0;
1674 			in_dev = __in_dev_get_rcu(dev);
1675 			if (!in_dev)
1676 				goto cont;
1677 
1678 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1679 			     ifa = ifa->ifa_next, ip_idx++) {
1680 				if (ip_idx < s_ip_idx)
1681 					continue;
1682 				if (inet_fill_ifaddr(skb, ifa,
1683 					     NETLINK_CB(cb->skb).portid,
1684 					     cb->nlh->nlmsg_seq,
1685 					     RTM_NEWADDR, NLM_F_MULTI) < 0) {
1686 					rcu_read_unlock();
1687 					goto done;
1688 				}
1689 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1690 			}
1691 cont:
1692 			idx++;
1693 		}
1694 		rcu_read_unlock();
1695 	}
1696 
1697 done:
1698 	cb->args[0] = h;
1699 	cb->args[1] = idx;
1700 	cb->args[2] = ip_idx;
1701 
1702 	return skb->len;
1703 }
1704 
1705 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1706 		      u32 portid)
1707 {
1708 	struct sk_buff *skb;
1709 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1710 	int err = -ENOBUFS;
1711 	struct net *net;
1712 
1713 	net = dev_net(ifa->ifa_dev->dev);
1714 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1715 	if (!skb)
1716 		goto errout;
1717 
1718 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1719 	if (err < 0) {
1720 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1721 		WARN_ON(err == -EMSGSIZE);
1722 		kfree_skb(skb);
1723 		goto errout;
1724 	}
1725 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1726 	return;
1727 errout:
1728 	if (err < 0)
1729 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1730 }
1731 
1732 static size_t inet_get_link_af_size(const struct net_device *dev,
1733 				    u32 ext_filter_mask)
1734 {
1735 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1736 
1737 	if (!in_dev)
1738 		return 0;
1739 
1740 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1741 }
1742 
1743 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1744 			     u32 ext_filter_mask)
1745 {
1746 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1747 	struct nlattr *nla;
1748 	int i;
1749 
1750 	if (!in_dev)
1751 		return -ENODATA;
1752 
1753 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1754 	if (!nla)
1755 		return -EMSGSIZE;
1756 
1757 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1758 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1759 
1760 	return 0;
1761 }
1762 
1763 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1764 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1765 };
1766 
1767 static int inet_validate_link_af(const struct net_device *dev,
1768 				 const struct nlattr *nla)
1769 {
1770 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1771 	int err, rem;
1772 
1773 	if (dev && !__in_dev_get_rcu(dev))
1774 		return -EAFNOSUPPORT;
1775 
1776 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1777 	if (err < 0)
1778 		return err;
1779 
1780 	if (tb[IFLA_INET_CONF]) {
1781 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1782 			int cfgid = nla_type(a);
1783 
1784 			if (nla_len(a) < 4)
1785 				return -EINVAL;
1786 
1787 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1788 				return -EINVAL;
1789 		}
1790 	}
1791 
1792 	return 0;
1793 }
1794 
1795 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1796 {
1797 	struct in_device *in_dev = __in_dev_get_rcu(dev);
1798 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1799 	int rem;
1800 
1801 	if (!in_dev)
1802 		return -EAFNOSUPPORT;
1803 
1804 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1805 		BUG();
1806 
1807 	if (tb[IFLA_INET_CONF]) {
1808 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1809 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1810 	}
1811 
1812 	return 0;
1813 }
1814 
1815 static int inet_netconf_msgsize_devconf(int type)
1816 {
1817 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1818 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1819 	bool all = false;
1820 
1821 	if (type == NETCONFA_ALL)
1822 		all = true;
1823 
1824 	if (all || type == NETCONFA_FORWARDING)
1825 		size += nla_total_size(4);
1826 	if (all || type == NETCONFA_RP_FILTER)
1827 		size += nla_total_size(4);
1828 	if (all || type == NETCONFA_MC_FORWARDING)
1829 		size += nla_total_size(4);
1830 	if (all || type == NETCONFA_BC_FORWARDING)
1831 		size += nla_total_size(4);
1832 	if (all || type == NETCONFA_PROXY_NEIGH)
1833 		size += nla_total_size(4);
1834 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1835 		size += nla_total_size(4);
1836 
1837 	return size;
1838 }
1839 
1840 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1841 				     struct ipv4_devconf *devconf, u32 portid,
1842 				     u32 seq, int event, unsigned int flags,
1843 				     int type)
1844 {
1845 	struct nlmsghdr  *nlh;
1846 	struct netconfmsg *ncm;
1847 	bool all = false;
1848 
1849 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1850 			flags);
1851 	if (!nlh)
1852 		return -EMSGSIZE;
1853 
1854 	if (type == NETCONFA_ALL)
1855 		all = true;
1856 
1857 	ncm = nlmsg_data(nlh);
1858 	ncm->ncm_family = AF_INET;
1859 
1860 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1861 		goto nla_put_failure;
1862 
1863 	if (!devconf)
1864 		goto out;
1865 
1866 	if ((all || type == NETCONFA_FORWARDING) &&
1867 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1868 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1869 		goto nla_put_failure;
1870 	if ((all || type == NETCONFA_RP_FILTER) &&
1871 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1872 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1873 		goto nla_put_failure;
1874 	if ((all || type == NETCONFA_MC_FORWARDING) &&
1875 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1876 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1877 		goto nla_put_failure;
1878 	if ((all || type == NETCONFA_BC_FORWARDING) &&
1879 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
1880 			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
1881 		goto nla_put_failure;
1882 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
1883 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1884 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1885 		goto nla_put_failure;
1886 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1887 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1888 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1889 		goto nla_put_failure;
1890 
1891 out:
1892 	nlmsg_end(skb, nlh);
1893 	return 0;
1894 
1895 nla_put_failure:
1896 	nlmsg_cancel(skb, nlh);
1897 	return -EMSGSIZE;
1898 }
1899 
1900 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1901 				 int ifindex, struct ipv4_devconf *devconf)
1902 {
1903 	struct sk_buff *skb;
1904 	int err = -ENOBUFS;
1905 
1906 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1907 	if (!skb)
1908 		goto errout;
1909 
1910 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1911 					event, 0, type);
1912 	if (err < 0) {
1913 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1914 		WARN_ON(err == -EMSGSIZE);
1915 		kfree_skb(skb);
1916 		goto errout;
1917 	}
1918 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1919 	return;
1920 errout:
1921 	if (err < 0)
1922 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1923 }
1924 
1925 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1926 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1927 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1928 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1929 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1930 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
1931 };
1932 
1933 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1934 				    struct nlmsghdr *nlh,
1935 				    struct netlink_ext_ack *extack)
1936 {
1937 	struct net *net = sock_net(in_skb->sk);
1938 	struct nlattr *tb[NETCONFA_MAX+1];
1939 	struct netconfmsg *ncm;
1940 	struct sk_buff *skb;
1941 	struct ipv4_devconf *devconf;
1942 	struct in_device *in_dev;
1943 	struct net_device *dev;
1944 	int ifindex;
1945 	int err;
1946 
1947 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1948 			  devconf_ipv4_policy, extack);
1949 	if (err < 0)
1950 		goto errout;
1951 
1952 	err = -EINVAL;
1953 	if (!tb[NETCONFA_IFINDEX])
1954 		goto errout;
1955 
1956 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1957 	switch (ifindex) {
1958 	case NETCONFA_IFINDEX_ALL:
1959 		devconf = net->ipv4.devconf_all;
1960 		break;
1961 	case NETCONFA_IFINDEX_DEFAULT:
1962 		devconf = net->ipv4.devconf_dflt;
1963 		break;
1964 	default:
1965 		dev = __dev_get_by_index(net, ifindex);
1966 		if (!dev)
1967 			goto errout;
1968 		in_dev = __in_dev_get_rtnl(dev);
1969 		if (!in_dev)
1970 			goto errout;
1971 		devconf = &in_dev->cnf;
1972 		break;
1973 	}
1974 
1975 	err = -ENOBUFS;
1976 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1977 	if (!skb)
1978 		goto errout;
1979 
1980 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1981 					NETLINK_CB(in_skb).portid,
1982 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1983 					NETCONFA_ALL);
1984 	if (err < 0) {
1985 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1986 		WARN_ON(err == -EMSGSIZE);
1987 		kfree_skb(skb);
1988 		goto errout;
1989 	}
1990 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1991 errout:
1992 	return err;
1993 }
1994 
1995 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1996 				     struct netlink_callback *cb)
1997 {
1998 	struct net *net = sock_net(skb->sk);
1999 	int h, s_h;
2000 	int idx, s_idx;
2001 	struct net_device *dev;
2002 	struct in_device *in_dev;
2003 	struct hlist_head *head;
2004 
2005 	s_h = cb->args[0];
2006 	s_idx = idx = cb->args[1];
2007 
2008 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2009 		idx = 0;
2010 		head = &net->dev_index_head[h];
2011 		rcu_read_lock();
2012 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2013 			  net->dev_base_seq;
2014 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2015 			if (idx < s_idx)
2016 				goto cont;
2017 			in_dev = __in_dev_get_rcu(dev);
2018 			if (!in_dev)
2019 				goto cont;
2020 
2021 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2022 						      &in_dev->cnf,
2023 						      NETLINK_CB(cb->skb).portid,
2024 						      cb->nlh->nlmsg_seq,
2025 						      RTM_NEWNETCONF,
2026 						      NLM_F_MULTI,
2027 						      NETCONFA_ALL) < 0) {
2028 				rcu_read_unlock();
2029 				goto done;
2030 			}
2031 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2032 cont:
2033 			idx++;
2034 		}
2035 		rcu_read_unlock();
2036 	}
2037 	if (h == NETDEV_HASHENTRIES) {
2038 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2039 					      net->ipv4.devconf_all,
2040 					      NETLINK_CB(cb->skb).portid,
2041 					      cb->nlh->nlmsg_seq,
2042 					      RTM_NEWNETCONF, NLM_F_MULTI,
2043 					      NETCONFA_ALL) < 0)
2044 			goto done;
2045 		else
2046 			h++;
2047 	}
2048 	if (h == NETDEV_HASHENTRIES + 1) {
2049 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2050 					      net->ipv4.devconf_dflt,
2051 					      NETLINK_CB(cb->skb).portid,
2052 					      cb->nlh->nlmsg_seq,
2053 					      RTM_NEWNETCONF, NLM_F_MULTI,
2054 					      NETCONFA_ALL) < 0)
2055 			goto done;
2056 		else
2057 			h++;
2058 	}
2059 done:
2060 	cb->args[0] = h;
2061 	cb->args[1] = idx;
2062 
2063 	return skb->len;
2064 }
2065 
2066 #ifdef CONFIG_SYSCTL
2067 
2068 static void devinet_copy_dflt_conf(struct net *net, int i)
2069 {
2070 	struct net_device *dev;
2071 
2072 	rcu_read_lock();
2073 	for_each_netdev_rcu(net, dev) {
2074 		struct in_device *in_dev;
2075 
2076 		in_dev = __in_dev_get_rcu(dev);
2077 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2078 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2079 	}
2080 	rcu_read_unlock();
2081 }
2082 
2083 /* called with RTNL locked */
2084 static void inet_forward_change(struct net *net)
2085 {
2086 	struct net_device *dev;
2087 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2088 
2089 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2090 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2091 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2092 				    NETCONFA_FORWARDING,
2093 				    NETCONFA_IFINDEX_ALL,
2094 				    net->ipv4.devconf_all);
2095 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2096 				    NETCONFA_FORWARDING,
2097 				    NETCONFA_IFINDEX_DEFAULT,
2098 				    net->ipv4.devconf_dflt);
2099 
2100 	for_each_netdev(net, dev) {
2101 		struct in_device *in_dev;
2102 
2103 		if (on)
2104 			dev_disable_lro(dev);
2105 
2106 		in_dev = __in_dev_get_rtnl(dev);
2107 		if (in_dev) {
2108 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2109 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2110 						    NETCONFA_FORWARDING,
2111 						    dev->ifindex, &in_dev->cnf);
2112 		}
2113 	}
2114 }
2115 
2116 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2117 {
2118 	if (cnf == net->ipv4.devconf_dflt)
2119 		return NETCONFA_IFINDEX_DEFAULT;
2120 	else if (cnf == net->ipv4.devconf_all)
2121 		return NETCONFA_IFINDEX_ALL;
2122 	else {
2123 		struct in_device *idev
2124 			= container_of(cnf, struct in_device, cnf);
2125 		return idev->dev->ifindex;
2126 	}
2127 }
2128 
2129 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2130 			     void __user *buffer,
2131 			     size_t *lenp, loff_t *ppos)
2132 {
2133 	int old_value = *(int *)ctl->data;
2134 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2135 	int new_value = *(int *)ctl->data;
2136 
2137 	if (write) {
2138 		struct ipv4_devconf *cnf = ctl->extra1;
2139 		struct net *net = ctl->extra2;
2140 		int i = (int *)ctl->data - cnf->data;
2141 		int ifindex;
2142 
2143 		set_bit(i, cnf->state);
2144 
2145 		if (cnf == net->ipv4.devconf_dflt)
2146 			devinet_copy_dflt_conf(net, i);
2147 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2148 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2149 			if ((new_value == 0) && (old_value != 0))
2150 				rt_cache_flush(net);
2151 
2152 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2153 		    new_value != old_value)
2154 			rt_cache_flush(net);
2155 
2156 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2157 		    new_value != old_value) {
2158 			ifindex = devinet_conf_ifindex(net, cnf);
2159 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2160 						    NETCONFA_RP_FILTER,
2161 						    ifindex, cnf);
2162 		}
2163 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2164 		    new_value != old_value) {
2165 			ifindex = devinet_conf_ifindex(net, cnf);
2166 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2167 						    NETCONFA_PROXY_NEIGH,
2168 						    ifindex, cnf);
2169 		}
2170 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2171 		    new_value != old_value) {
2172 			ifindex = devinet_conf_ifindex(net, cnf);
2173 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2174 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2175 						    ifindex, cnf);
2176 		}
2177 	}
2178 
2179 	return ret;
2180 }
2181 
2182 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2183 				  void __user *buffer,
2184 				  size_t *lenp, loff_t *ppos)
2185 {
2186 	int *valp = ctl->data;
2187 	int val = *valp;
2188 	loff_t pos = *ppos;
2189 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2190 
2191 	if (write && *valp != val) {
2192 		struct net *net = ctl->extra2;
2193 
2194 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2195 			if (!rtnl_trylock()) {
2196 				/* Restore the original values before restarting */
2197 				*valp = val;
2198 				*ppos = pos;
2199 				return restart_syscall();
2200 			}
2201 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2202 				inet_forward_change(net);
2203 			} else {
2204 				struct ipv4_devconf *cnf = ctl->extra1;
2205 				struct in_device *idev =
2206 					container_of(cnf, struct in_device, cnf);
2207 				if (*valp)
2208 					dev_disable_lro(idev->dev);
2209 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2210 							    NETCONFA_FORWARDING,
2211 							    idev->dev->ifindex,
2212 							    cnf);
2213 			}
2214 			rtnl_unlock();
2215 			rt_cache_flush(net);
2216 		} else
2217 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2218 						    NETCONFA_FORWARDING,
2219 						    NETCONFA_IFINDEX_DEFAULT,
2220 						    net->ipv4.devconf_dflt);
2221 	}
2222 
2223 	return ret;
2224 }
2225 
2226 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2227 				void __user *buffer,
2228 				size_t *lenp, loff_t *ppos)
2229 {
2230 	int *valp = ctl->data;
2231 	int val = *valp;
2232 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2233 	struct net *net = ctl->extra2;
2234 
2235 	if (write && *valp != val)
2236 		rt_cache_flush(net);
2237 
2238 	return ret;
2239 }
2240 
2241 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2242 	{ \
2243 		.procname	= name, \
2244 		.data		= ipv4_devconf.data + \
2245 				  IPV4_DEVCONF_ ## attr - 1, \
2246 		.maxlen		= sizeof(int), \
2247 		.mode		= mval, \
2248 		.proc_handler	= proc, \
2249 		.extra1		= &ipv4_devconf, \
2250 	}
2251 
2252 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2253 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2254 
2255 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2256 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2257 
2258 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2259 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2260 
2261 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2262 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2263 
2264 static struct devinet_sysctl_table {
2265 	struct ctl_table_header *sysctl_header;
2266 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2267 } devinet_sysctl = {
2268 	.devinet_vars = {
2269 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2270 					     devinet_sysctl_forward),
2271 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2272 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2273 
2274 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2275 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2276 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2277 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2278 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2279 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2280 					"accept_source_route"),
2281 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2282 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2283 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2284 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2285 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2286 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2287 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2288 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2289 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2290 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2291 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2292 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2293 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2294 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2295 					"force_igmp_version"),
2296 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2297 					"igmpv2_unsolicited_report_interval"),
2298 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2299 					"igmpv3_unsolicited_report_interval"),
2300 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2301 					"ignore_routes_with_linkdown"),
2302 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2303 					"drop_gratuitous_arp"),
2304 
2305 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2306 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2307 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2308 					      "promote_secondaries"),
2309 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2310 					      "route_localnet"),
2311 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2312 					      "drop_unicast_in_l2_multicast"),
2313 	},
2314 };
2315 
2316 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2317 				     int ifindex, struct ipv4_devconf *p)
2318 {
2319 	int i;
2320 	struct devinet_sysctl_table *t;
2321 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2322 
2323 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2324 	if (!t)
2325 		goto out;
2326 
2327 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2328 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2329 		t->devinet_vars[i].extra1 = p;
2330 		t->devinet_vars[i].extra2 = net;
2331 	}
2332 
2333 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2334 
2335 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2336 	if (!t->sysctl_header)
2337 		goto free;
2338 
2339 	p->sysctl = t;
2340 
2341 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2342 				    ifindex, p);
2343 	return 0;
2344 
2345 free:
2346 	kfree(t);
2347 out:
2348 	return -ENOBUFS;
2349 }
2350 
2351 static void __devinet_sysctl_unregister(struct net *net,
2352 					struct ipv4_devconf *cnf, int ifindex)
2353 {
2354 	struct devinet_sysctl_table *t = cnf->sysctl;
2355 
2356 	if (t) {
2357 		cnf->sysctl = NULL;
2358 		unregister_net_sysctl_table(t->sysctl_header);
2359 		kfree(t);
2360 	}
2361 
2362 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2363 }
2364 
2365 static int devinet_sysctl_register(struct in_device *idev)
2366 {
2367 	int err;
2368 
2369 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2370 		return -EINVAL;
2371 
2372 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2373 	if (err)
2374 		return err;
2375 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2376 					idev->dev->ifindex, &idev->cnf);
2377 	if (err)
2378 		neigh_sysctl_unregister(idev->arp_parms);
2379 	return err;
2380 }
2381 
2382 static void devinet_sysctl_unregister(struct in_device *idev)
2383 {
2384 	struct net *net = dev_net(idev->dev);
2385 
2386 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2387 	neigh_sysctl_unregister(idev->arp_parms);
2388 }
2389 
2390 static struct ctl_table ctl_forward_entry[] = {
2391 	{
2392 		.procname	= "ip_forward",
2393 		.data		= &ipv4_devconf.data[
2394 					IPV4_DEVCONF_FORWARDING - 1],
2395 		.maxlen		= sizeof(int),
2396 		.mode		= 0644,
2397 		.proc_handler	= devinet_sysctl_forward,
2398 		.extra1		= &ipv4_devconf,
2399 		.extra2		= &init_net,
2400 	},
2401 	{ },
2402 };
2403 #endif
2404 
2405 static __net_init int devinet_init_net(struct net *net)
2406 {
2407 	int err;
2408 	struct ipv4_devconf *all, *dflt;
2409 #ifdef CONFIG_SYSCTL
2410 	struct ctl_table *tbl = ctl_forward_entry;
2411 	struct ctl_table_header *forw_hdr;
2412 #endif
2413 
2414 	err = -ENOMEM;
2415 	all = &ipv4_devconf;
2416 	dflt = &ipv4_devconf_dflt;
2417 
2418 	if (!net_eq(net, &init_net)) {
2419 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2420 		if (!all)
2421 			goto err_alloc_all;
2422 
2423 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2424 		if (!dflt)
2425 			goto err_alloc_dflt;
2426 
2427 #ifdef CONFIG_SYSCTL
2428 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2429 		if (!tbl)
2430 			goto err_alloc_ctl;
2431 
2432 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2433 		tbl[0].extra1 = all;
2434 		tbl[0].extra2 = net;
2435 #endif
2436 	}
2437 
2438 #ifdef CONFIG_SYSCTL
2439 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2440 	if (err < 0)
2441 		goto err_reg_all;
2442 
2443 	err = __devinet_sysctl_register(net, "default",
2444 					NETCONFA_IFINDEX_DEFAULT, dflt);
2445 	if (err < 0)
2446 		goto err_reg_dflt;
2447 
2448 	err = -ENOMEM;
2449 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2450 	if (!forw_hdr)
2451 		goto err_reg_ctl;
2452 	net->ipv4.forw_hdr = forw_hdr;
2453 #endif
2454 
2455 	net->ipv4.devconf_all = all;
2456 	net->ipv4.devconf_dflt = dflt;
2457 	return 0;
2458 
2459 #ifdef CONFIG_SYSCTL
2460 err_reg_ctl:
2461 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2462 err_reg_dflt:
2463 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2464 err_reg_all:
2465 	if (tbl != ctl_forward_entry)
2466 		kfree(tbl);
2467 err_alloc_ctl:
2468 #endif
2469 	if (dflt != &ipv4_devconf_dflt)
2470 		kfree(dflt);
2471 err_alloc_dflt:
2472 	if (all != &ipv4_devconf)
2473 		kfree(all);
2474 err_alloc_all:
2475 	return err;
2476 }
2477 
2478 static __net_exit void devinet_exit_net(struct net *net)
2479 {
2480 #ifdef CONFIG_SYSCTL
2481 	struct ctl_table *tbl;
2482 
2483 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2484 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2485 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2486 				    NETCONFA_IFINDEX_DEFAULT);
2487 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2488 				    NETCONFA_IFINDEX_ALL);
2489 	kfree(tbl);
2490 #endif
2491 	kfree(net->ipv4.devconf_dflt);
2492 	kfree(net->ipv4.devconf_all);
2493 }
2494 
2495 static __net_initdata struct pernet_operations devinet_ops = {
2496 	.init = devinet_init_net,
2497 	.exit = devinet_exit_net,
2498 };
2499 
2500 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2501 	.family		  = AF_INET,
2502 	.fill_link_af	  = inet_fill_link_af,
2503 	.get_link_af_size = inet_get_link_af_size,
2504 	.validate_link_af = inet_validate_link_af,
2505 	.set_link_af	  = inet_set_link_af,
2506 };
2507 
2508 void __init devinet_init(void)
2509 {
2510 	int i;
2511 
2512 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2513 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2514 
2515 	register_pernet_subsys(&devinet_ops);
2516 
2517 	register_gifconf(PF_INET, inet_gifconf);
2518 	register_netdevice_notifier(&ip_netdev_notifier);
2519 
2520 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2521 
2522 	rtnl_af_register(&inet_af_ops);
2523 
2524 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2525 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2526 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2527 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2528 		      inet_netconf_dump_devconf, 0);
2529 }
2530