xref: /openbmc/linux/net/ipv4/devinet.c (revision 256ac037)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60 
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77 	},
78 };
79 
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81 	.data = {
82 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89 	},
90 };
91 
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94 
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96 	[IFA_LOCAL]     	= { .type = NLA_U32 },
97 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
98 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
99 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
101 	[IFA_FLAGS]		= { .type = NLA_U32 },
102 };
103 
104 #define IN4_ADDR_HSIZE_SHIFT	8
105 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
106 
107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
108 
109 static u32 inet_addr_hash(const struct net *net, __be32 addr)
110 {
111 	u32 val = (__force u32) addr ^ net_hash_mix(net);
112 
113 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
114 }
115 
116 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
117 {
118 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
119 
120 	ASSERT_RTNL();
121 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
122 }
123 
124 static void inet_hash_remove(struct in_ifaddr *ifa)
125 {
126 	ASSERT_RTNL();
127 	hlist_del_init_rcu(&ifa->hash);
128 }
129 
130 /**
131  * __ip_dev_find - find the first device with a given source address.
132  * @net: the net namespace
133  * @addr: the source address
134  * @devref: if true, take a reference on the found device
135  *
136  * If a caller uses devref=false, it should be protected by RCU, or RTNL
137  */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140 	u32 hash = inet_addr_hash(net, addr);
141 	struct net_device *result = NULL;
142 	struct in_ifaddr *ifa;
143 
144 	rcu_read_lock();
145 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
146 		if (ifa->ifa_local == addr) {
147 			struct net_device *dev = ifa->ifa_dev->dev;
148 
149 			if (!net_eq(dev_net(dev), net))
150 				continue;
151 			result = dev;
152 			break;
153 		}
154 	}
155 	if (!result) {
156 		struct flowi4 fl4 = { .daddr = addr };
157 		struct fib_result res = { 0 };
158 		struct fib_table *local;
159 
160 		/* Fallback to FIB local table so that communication
161 		 * over loopback subnets work.
162 		 */
163 		local = fib_get_table(net, RT_TABLE_LOCAL);
164 		if (local &&
165 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166 		    res.type == RTN_LOCAL)
167 			result = FIB_RES_DEV(res);
168 	}
169 	if (result && devref)
170 		dev_hold(result);
171 	rcu_read_unlock();
172 	return result;
173 }
174 EXPORT_SYMBOL(__ip_dev_find);
175 
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177 
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
180 			 int destroy);
181 #ifdef CONFIG_SYSCTL
182 static int devinet_sysctl_register(struct in_device *idev);
183 static void devinet_sysctl_unregister(struct in_device *idev);
184 #else
185 static int devinet_sysctl_register(struct in_device *idev)
186 {
187 	return 0;
188 }
189 static void devinet_sysctl_unregister(struct in_device *idev)
190 {
191 }
192 #endif
193 
194 /* Locks all the inet devices. */
195 
196 static struct in_ifaddr *inet_alloc_ifa(void)
197 {
198 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
199 }
200 
201 static void inet_rcu_free_ifa(struct rcu_head *head)
202 {
203 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
204 	if (ifa->ifa_dev)
205 		in_dev_put(ifa->ifa_dev);
206 	kfree(ifa);
207 }
208 
209 static void inet_free_ifa(struct in_ifaddr *ifa)
210 {
211 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
212 }
213 
214 void in_dev_finish_destroy(struct in_device *idev)
215 {
216 	struct net_device *dev = idev->dev;
217 
218 	WARN_ON(idev->ifa_list);
219 	WARN_ON(idev->mc_list);
220 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
221 #ifdef NET_REFCNT_DEBUG
222 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
223 #endif
224 	dev_put(dev);
225 	if (!idev->dead)
226 		pr_err("Freeing alive in_device %p\n", idev);
227 	else
228 		kfree(idev);
229 }
230 EXPORT_SYMBOL(in_dev_finish_destroy);
231 
232 static struct in_device *inetdev_init(struct net_device *dev)
233 {
234 	struct in_device *in_dev;
235 	int err = -ENOMEM;
236 
237 	ASSERT_RTNL();
238 
239 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
240 	if (!in_dev)
241 		goto out;
242 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
243 			sizeof(in_dev->cnf));
244 	in_dev->cnf.sysctl = NULL;
245 	in_dev->dev = dev;
246 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
247 	if (!in_dev->arp_parms)
248 		goto out_kfree;
249 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
250 		dev_disable_lro(dev);
251 	/* Reference in_dev->dev */
252 	dev_hold(dev);
253 	/* Account for reference dev->ip_ptr (below) */
254 	in_dev_hold(in_dev);
255 
256 	err = devinet_sysctl_register(in_dev);
257 	if (err) {
258 		in_dev->dead = 1;
259 		in_dev_put(in_dev);
260 		in_dev = NULL;
261 		goto out;
262 	}
263 	ip_mc_init_dev(in_dev);
264 	if (dev->flags & IFF_UP)
265 		ip_mc_up(in_dev);
266 
267 	/* we can receive as soon as ip_ptr is set -- do this last */
268 	rcu_assign_pointer(dev->ip_ptr, in_dev);
269 out:
270 	return in_dev ?: ERR_PTR(err);
271 out_kfree:
272 	kfree(in_dev);
273 	in_dev = NULL;
274 	goto out;
275 }
276 
277 static void in_dev_rcu_put(struct rcu_head *head)
278 {
279 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
280 	in_dev_put(idev);
281 }
282 
283 static void inetdev_destroy(struct in_device *in_dev)
284 {
285 	struct in_ifaddr *ifa;
286 	struct net_device *dev;
287 
288 	ASSERT_RTNL();
289 
290 	dev = in_dev->dev;
291 
292 	in_dev->dead = 1;
293 
294 	ip_mc_destroy_dev(in_dev);
295 
296 	while ((ifa = in_dev->ifa_list) != NULL) {
297 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
298 		inet_free_ifa(ifa);
299 	}
300 
301 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
302 
303 	devinet_sysctl_unregister(in_dev);
304 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
305 	arp_ifdown(dev);
306 
307 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
308 }
309 
310 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
311 {
312 	rcu_read_lock();
313 	for_primary_ifa(in_dev) {
314 		if (inet_ifa_match(a, ifa)) {
315 			if (!b || inet_ifa_match(b, ifa)) {
316 				rcu_read_unlock();
317 				return 1;
318 			}
319 		}
320 	} endfor_ifa(in_dev);
321 	rcu_read_unlock();
322 	return 0;
323 }
324 
325 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
326 			 int destroy, struct nlmsghdr *nlh, u32 portid)
327 {
328 	struct in_ifaddr *promote = NULL;
329 	struct in_ifaddr *ifa, *ifa1 = *ifap;
330 	struct in_ifaddr *last_prim = in_dev->ifa_list;
331 	struct in_ifaddr *prev_prom = NULL;
332 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
333 
334 	ASSERT_RTNL();
335 
336 	if (in_dev->dead)
337 		goto no_promotions;
338 
339 	/* 1. Deleting primary ifaddr forces deletion all secondaries
340 	 * unless alias promotion is set
341 	 **/
342 
343 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
344 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
345 
346 		while ((ifa = *ifap1) != NULL) {
347 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
348 			    ifa1->ifa_scope <= ifa->ifa_scope)
349 				last_prim = ifa;
350 
351 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
352 			    ifa1->ifa_mask != ifa->ifa_mask ||
353 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
354 				ifap1 = &ifa->ifa_next;
355 				prev_prom = ifa;
356 				continue;
357 			}
358 
359 			if (!do_promote) {
360 				inet_hash_remove(ifa);
361 				*ifap1 = ifa->ifa_next;
362 
363 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
364 				blocking_notifier_call_chain(&inetaddr_chain,
365 						NETDEV_DOWN, ifa);
366 				inet_free_ifa(ifa);
367 			} else {
368 				promote = ifa;
369 				break;
370 			}
371 		}
372 	}
373 
374 	/* On promotion all secondaries from subnet are changing
375 	 * the primary IP, we must remove all their routes silently
376 	 * and later to add them back with new prefsrc. Do this
377 	 * while all addresses are on the device list.
378 	 */
379 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
380 		if (ifa1->ifa_mask == ifa->ifa_mask &&
381 		    inet_ifa_match(ifa1->ifa_address, ifa))
382 			fib_del_ifaddr(ifa, ifa1);
383 	}
384 
385 no_promotions:
386 	/* 2. Unlink it */
387 
388 	*ifap = ifa1->ifa_next;
389 	inet_hash_remove(ifa1);
390 
391 	/* 3. Announce address deletion */
392 
393 	/* Send message first, then call notifier.
394 	   At first sight, FIB update triggered by notifier
395 	   will refer to already deleted ifaddr, that could confuse
396 	   netlink listeners. It is not true: look, gated sees
397 	   that route deleted and if it still thinks that ifaddr
398 	   is valid, it will try to restore deleted routes... Grr.
399 	   So that, this order is correct.
400 	 */
401 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
402 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
403 
404 	if (promote) {
405 		struct in_ifaddr *next_sec = promote->ifa_next;
406 
407 		if (prev_prom) {
408 			prev_prom->ifa_next = promote->ifa_next;
409 			promote->ifa_next = last_prim->ifa_next;
410 			last_prim->ifa_next = promote;
411 		}
412 
413 		promote->ifa_flags &= ~IFA_F_SECONDARY;
414 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
415 		blocking_notifier_call_chain(&inetaddr_chain,
416 				NETDEV_UP, promote);
417 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
418 			if (ifa1->ifa_mask != ifa->ifa_mask ||
419 			    !inet_ifa_match(ifa1->ifa_address, ifa))
420 					continue;
421 			fib_add_ifaddr(ifa);
422 		}
423 
424 	}
425 	if (destroy)
426 		inet_free_ifa(ifa1);
427 }
428 
429 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
430 			 int destroy)
431 {
432 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
433 }
434 
435 static void check_lifetime(struct work_struct *work);
436 
437 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
438 
439 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
440 			     u32 portid)
441 {
442 	struct in_device *in_dev = ifa->ifa_dev;
443 	struct in_ifaddr *ifa1, **ifap, **last_primary;
444 
445 	ASSERT_RTNL();
446 
447 	if (!ifa->ifa_local) {
448 		inet_free_ifa(ifa);
449 		return 0;
450 	}
451 
452 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
453 	last_primary = &in_dev->ifa_list;
454 
455 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
456 	     ifap = &ifa1->ifa_next) {
457 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
458 		    ifa->ifa_scope <= ifa1->ifa_scope)
459 			last_primary = &ifa1->ifa_next;
460 		if (ifa1->ifa_mask == ifa->ifa_mask &&
461 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
462 			if (ifa1->ifa_local == ifa->ifa_local) {
463 				inet_free_ifa(ifa);
464 				return -EEXIST;
465 			}
466 			if (ifa1->ifa_scope != ifa->ifa_scope) {
467 				inet_free_ifa(ifa);
468 				return -EINVAL;
469 			}
470 			ifa->ifa_flags |= IFA_F_SECONDARY;
471 		}
472 	}
473 
474 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
475 		prandom_seed((__force u32) ifa->ifa_local);
476 		ifap = last_primary;
477 	}
478 
479 	ifa->ifa_next = *ifap;
480 	*ifap = ifa;
481 
482 	inet_hash_insert(dev_net(in_dev->dev), ifa);
483 
484 	cancel_delayed_work(&check_lifetime_work);
485 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
486 
487 	/* Send message first, then call notifier.
488 	   Notifier will trigger FIB update, so that
489 	   listeners of netlink will know about new ifaddr */
490 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
491 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
492 
493 	return 0;
494 }
495 
496 static int inet_insert_ifa(struct in_ifaddr *ifa)
497 {
498 	return __inet_insert_ifa(ifa, NULL, 0);
499 }
500 
501 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
502 {
503 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
504 
505 	ASSERT_RTNL();
506 
507 	if (!in_dev) {
508 		inet_free_ifa(ifa);
509 		return -ENOBUFS;
510 	}
511 	ipv4_devconf_setall(in_dev);
512 	neigh_parms_data_state_setall(in_dev->arp_parms);
513 	if (ifa->ifa_dev != in_dev) {
514 		WARN_ON(ifa->ifa_dev);
515 		in_dev_hold(in_dev);
516 		ifa->ifa_dev = in_dev;
517 	}
518 	if (ipv4_is_loopback(ifa->ifa_local))
519 		ifa->ifa_scope = RT_SCOPE_HOST;
520 	return inet_insert_ifa(ifa);
521 }
522 
523 /* Caller must hold RCU or RTNL :
524  * We dont take a reference on found in_device
525  */
526 struct in_device *inetdev_by_index(struct net *net, int ifindex)
527 {
528 	struct net_device *dev;
529 	struct in_device *in_dev = NULL;
530 
531 	rcu_read_lock();
532 	dev = dev_get_by_index_rcu(net, ifindex);
533 	if (dev)
534 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
535 	rcu_read_unlock();
536 	return in_dev;
537 }
538 EXPORT_SYMBOL(inetdev_by_index);
539 
540 /* Called only from RTNL semaphored context. No locks. */
541 
542 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
543 				    __be32 mask)
544 {
545 	ASSERT_RTNL();
546 
547 	for_primary_ifa(in_dev) {
548 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
549 			return ifa;
550 	} endfor_ifa(in_dev);
551 	return NULL;
552 }
553 
554 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
555 {
556 	struct ip_mreqn mreq = {
557 		.imr_multiaddr.s_addr = ifa->ifa_address,
558 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
559 	};
560 	int ret;
561 
562 	ASSERT_RTNL();
563 
564 	lock_sock(sk);
565 	if (join)
566 		ret = ip_mc_join_group(sk, &mreq);
567 	else
568 		ret = ip_mc_leave_group(sk, &mreq);
569 	release_sock(sk);
570 
571 	return ret;
572 }
573 
574 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
575 			    struct netlink_ext_ack *extack)
576 {
577 	struct net *net = sock_net(skb->sk);
578 	struct nlattr *tb[IFA_MAX+1];
579 	struct in_device *in_dev;
580 	struct ifaddrmsg *ifm;
581 	struct in_ifaddr *ifa, **ifap;
582 	int err = -EINVAL;
583 
584 	ASSERT_RTNL();
585 
586 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
587 			  extack);
588 	if (err < 0)
589 		goto errout;
590 
591 	ifm = nlmsg_data(nlh);
592 	in_dev = inetdev_by_index(net, ifm->ifa_index);
593 	if (!in_dev) {
594 		err = -ENODEV;
595 		goto errout;
596 	}
597 
598 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
599 	     ifap = &ifa->ifa_next) {
600 		if (tb[IFA_LOCAL] &&
601 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
602 			continue;
603 
604 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
605 			continue;
606 
607 		if (tb[IFA_ADDRESS] &&
608 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
609 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
610 			continue;
611 
612 		if (ipv4_is_multicast(ifa->ifa_address))
613 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
614 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
615 		return 0;
616 	}
617 
618 	err = -EADDRNOTAVAIL;
619 errout:
620 	return err;
621 }
622 
623 #define INFINITY_LIFE_TIME	0xFFFFFFFF
624 
625 static void check_lifetime(struct work_struct *work)
626 {
627 	unsigned long now, next, next_sec, next_sched;
628 	struct in_ifaddr *ifa;
629 	struct hlist_node *n;
630 	int i;
631 
632 	now = jiffies;
633 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
634 
635 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
636 		bool change_needed = false;
637 
638 		rcu_read_lock();
639 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
640 			unsigned long age;
641 
642 			if (ifa->ifa_flags & IFA_F_PERMANENT)
643 				continue;
644 
645 			/* We try to batch several events at once. */
646 			age = (now - ifa->ifa_tstamp +
647 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
648 
649 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
650 			    age >= ifa->ifa_valid_lft) {
651 				change_needed = true;
652 			} else if (ifa->ifa_preferred_lft ==
653 				   INFINITY_LIFE_TIME) {
654 				continue;
655 			} else if (age >= ifa->ifa_preferred_lft) {
656 				if (time_before(ifa->ifa_tstamp +
657 						ifa->ifa_valid_lft * HZ, next))
658 					next = ifa->ifa_tstamp +
659 					       ifa->ifa_valid_lft * HZ;
660 
661 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
662 					change_needed = true;
663 			} else if (time_before(ifa->ifa_tstamp +
664 					       ifa->ifa_preferred_lft * HZ,
665 					       next)) {
666 				next = ifa->ifa_tstamp +
667 				       ifa->ifa_preferred_lft * HZ;
668 			}
669 		}
670 		rcu_read_unlock();
671 		if (!change_needed)
672 			continue;
673 		rtnl_lock();
674 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
675 			unsigned long age;
676 
677 			if (ifa->ifa_flags & IFA_F_PERMANENT)
678 				continue;
679 
680 			/* We try to batch several events at once. */
681 			age = (now - ifa->ifa_tstamp +
682 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
683 
684 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
685 			    age >= ifa->ifa_valid_lft) {
686 				struct in_ifaddr **ifap;
687 
688 				for (ifap = &ifa->ifa_dev->ifa_list;
689 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
690 					if (*ifap == ifa) {
691 						inet_del_ifa(ifa->ifa_dev,
692 							     ifap, 1);
693 						break;
694 					}
695 				}
696 			} else if (ifa->ifa_preferred_lft !=
697 				   INFINITY_LIFE_TIME &&
698 				   age >= ifa->ifa_preferred_lft &&
699 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
700 				ifa->ifa_flags |= IFA_F_DEPRECATED;
701 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
702 			}
703 		}
704 		rtnl_unlock();
705 	}
706 
707 	next_sec = round_jiffies_up(next);
708 	next_sched = next;
709 
710 	/* If rounded timeout is accurate enough, accept it. */
711 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
712 		next_sched = next_sec;
713 
714 	now = jiffies;
715 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
716 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
717 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
718 
719 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
720 			next_sched - now);
721 }
722 
723 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
724 			     __u32 prefered_lft)
725 {
726 	unsigned long timeout;
727 
728 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
729 
730 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
731 	if (addrconf_finite_timeout(timeout))
732 		ifa->ifa_valid_lft = timeout;
733 	else
734 		ifa->ifa_flags |= IFA_F_PERMANENT;
735 
736 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
737 	if (addrconf_finite_timeout(timeout)) {
738 		if (timeout == 0)
739 			ifa->ifa_flags |= IFA_F_DEPRECATED;
740 		ifa->ifa_preferred_lft = timeout;
741 	}
742 	ifa->ifa_tstamp = jiffies;
743 	if (!ifa->ifa_cstamp)
744 		ifa->ifa_cstamp = ifa->ifa_tstamp;
745 }
746 
747 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
748 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
749 {
750 	struct nlattr *tb[IFA_MAX+1];
751 	struct in_ifaddr *ifa;
752 	struct ifaddrmsg *ifm;
753 	struct net_device *dev;
754 	struct in_device *in_dev;
755 	int err;
756 
757 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
758 			  NULL);
759 	if (err < 0)
760 		goto errout;
761 
762 	ifm = nlmsg_data(nlh);
763 	err = -EINVAL;
764 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
765 		goto errout;
766 
767 	dev = __dev_get_by_index(net, ifm->ifa_index);
768 	err = -ENODEV;
769 	if (!dev)
770 		goto errout;
771 
772 	in_dev = __in_dev_get_rtnl(dev);
773 	err = -ENOBUFS;
774 	if (!in_dev)
775 		goto errout;
776 
777 	ifa = inet_alloc_ifa();
778 	if (!ifa)
779 		/*
780 		 * A potential indev allocation can be left alive, it stays
781 		 * assigned to its device and is destroy with it.
782 		 */
783 		goto errout;
784 
785 	ipv4_devconf_setall(in_dev);
786 	neigh_parms_data_state_setall(in_dev->arp_parms);
787 	in_dev_hold(in_dev);
788 
789 	if (!tb[IFA_ADDRESS])
790 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
791 
792 	INIT_HLIST_NODE(&ifa->hash);
793 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
794 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
795 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
796 					 ifm->ifa_flags;
797 	ifa->ifa_scope = ifm->ifa_scope;
798 	ifa->ifa_dev = in_dev;
799 
800 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
801 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
802 
803 	if (tb[IFA_BROADCAST])
804 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
805 
806 	if (tb[IFA_LABEL])
807 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
808 	else
809 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
810 
811 	if (tb[IFA_CACHEINFO]) {
812 		struct ifa_cacheinfo *ci;
813 
814 		ci = nla_data(tb[IFA_CACHEINFO]);
815 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
816 			err = -EINVAL;
817 			goto errout_free;
818 		}
819 		*pvalid_lft = ci->ifa_valid;
820 		*pprefered_lft = ci->ifa_prefered;
821 	}
822 
823 	return ifa;
824 
825 errout_free:
826 	inet_free_ifa(ifa);
827 errout:
828 	return ERR_PTR(err);
829 }
830 
831 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
832 {
833 	struct in_device *in_dev = ifa->ifa_dev;
834 	struct in_ifaddr *ifa1, **ifap;
835 
836 	if (!ifa->ifa_local)
837 		return NULL;
838 
839 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
840 	     ifap = &ifa1->ifa_next) {
841 		if (ifa1->ifa_mask == ifa->ifa_mask &&
842 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
843 		    ifa1->ifa_local == ifa->ifa_local)
844 			return ifa1;
845 	}
846 	return NULL;
847 }
848 
849 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
850 			    struct netlink_ext_ack *extack)
851 {
852 	struct net *net = sock_net(skb->sk);
853 	struct in_ifaddr *ifa;
854 	struct in_ifaddr *ifa_existing;
855 	__u32 valid_lft = INFINITY_LIFE_TIME;
856 	__u32 prefered_lft = INFINITY_LIFE_TIME;
857 
858 	ASSERT_RTNL();
859 
860 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
861 	if (IS_ERR(ifa))
862 		return PTR_ERR(ifa);
863 
864 	ifa_existing = find_matching_ifa(ifa);
865 	if (!ifa_existing) {
866 		/* It would be best to check for !NLM_F_CREATE here but
867 		 * userspace already relies on not having to provide this.
868 		 */
869 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
870 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
871 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
872 					       true, ifa);
873 
874 			if (ret < 0) {
875 				inet_free_ifa(ifa);
876 				return ret;
877 			}
878 		}
879 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
880 	} else {
881 		inet_free_ifa(ifa);
882 
883 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
884 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
885 			return -EEXIST;
886 		ifa = ifa_existing;
887 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
888 		cancel_delayed_work(&check_lifetime_work);
889 		queue_delayed_work(system_power_efficient_wq,
890 				&check_lifetime_work, 0);
891 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
892 	}
893 	return 0;
894 }
895 
896 /*
897  *	Determine a default network mask, based on the IP address.
898  */
899 
900 static int inet_abc_len(__be32 addr)
901 {
902 	int rc = -1;	/* Something else, probably a multicast. */
903 
904 	if (ipv4_is_zeronet(addr))
905 		rc = 0;
906 	else {
907 		__u32 haddr = ntohl(addr);
908 
909 		if (IN_CLASSA(haddr))
910 			rc = 8;
911 		else if (IN_CLASSB(haddr))
912 			rc = 16;
913 		else if (IN_CLASSC(haddr))
914 			rc = 24;
915 	}
916 
917 	return rc;
918 }
919 
920 
921 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
922 {
923 	struct ifreq ifr;
924 	struct sockaddr_in sin_orig;
925 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
926 	struct in_device *in_dev;
927 	struct in_ifaddr **ifap = NULL;
928 	struct in_ifaddr *ifa = NULL;
929 	struct net_device *dev;
930 	char *colon;
931 	int ret = -EFAULT;
932 	int tryaddrmatch = 0;
933 
934 	/*
935 	 *	Fetch the caller's info block into kernel space
936 	 */
937 
938 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
939 		goto out;
940 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
941 
942 	/* save original address for comparison */
943 	memcpy(&sin_orig, sin, sizeof(*sin));
944 
945 	colon = strchr(ifr.ifr_name, ':');
946 	if (colon)
947 		*colon = 0;
948 
949 	dev_load(net, ifr.ifr_name);
950 
951 	switch (cmd) {
952 	case SIOCGIFADDR:	/* Get interface address */
953 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
954 	case SIOCGIFDSTADDR:	/* Get the destination address */
955 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
956 		/* Note that these ioctls will not sleep,
957 		   so that we do not impose a lock.
958 		   One day we will be forced to put shlock here (I mean SMP)
959 		 */
960 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
961 		memset(sin, 0, sizeof(*sin));
962 		sin->sin_family = AF_INET;
963 		break;
964 
965 	case SIOCSIFFLAGS:
966 		ret = -EPERM;
967 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
968 			goto out;
969 		break;
970 	case SIOCSIFADDR:	/* Set interface address (and family) */
971 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
972 	case SIOCSIFDSTADDR:	/* Set the destination address */
973 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
974 		ret = -EPERM;
975 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
976 			goto out;
977 		ret = -EINVAL;
978 		if (sin->sin_family != AF_INET)
979 			goto out;
980 		break;
981 	default:
982 		ret = -EINVAL;
983 		goto out;
984 	}
985 
986 	rtnl_lock();
987 
988 	ret = -ENODEV;
989 	dev = __dev_get_by_name(net, ifr.ifr_name);
990 	if (!dev)
991 		goto done;
992 
993 	if (colon)
994 		*colon = ':';
995 
996 	in_dev = __in_dev_get_rtnl(dev);
997 	if (in_dev) {
998 		if (tryaddrmatch) {
999 			/* Matthias Andree */
1000 			/* compare label and address (4.4BSD style) */
1001 			/* note: we only do this for a limited set of ioctls
1002 			   and only if the original address family was AF_INET.
1003 			   This is checked above. */
1004 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1005 			     ifap = &ifa->ifa_next) {
1006 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1007 				    sin_orig.sin_addr.s_addr ==
1008 							ifa->ifa_local) {
1009 					break; /* found */
1010 				}
1011 			}
1012 		}
1013 		/* we didn't get a match, maybe the application is
1014 		   4.3BSD-style and passed in junk so we fall back to
1015 		   comparing just the label */
1016 		if (!ifa) {
1017 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1018 			     ifap = &ifa->ifa_next)
1019 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1020 					break;
1021 		}
1022 	}
1023 
1024 	ret = -EADDRNOTAVAIL;
1025 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1026 		goto done;
1027 
1028 	switch (cmd) {
1029 	case SIOCGIFADDR:	/* Get interface address */
1030 		sin->sin_addr.s_addr = ifa->ifa_local;
1031 		goto rarok;
1032 
1033 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1034 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1035 		goto rarok;
1036 
1037 	case SIOCGIFDSTADDR:	/* Get the destination address */
1038 		sin->sin_addr.s_addr = ifa->ifa_address;
1039 		goto rarok;
1040 
1041 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1042 		sin->sin_addr.s_addr = ifa->ifa_mask;
1043 		goto rarok;
1044 
1045 	case SIOCSIFFLAGS:
1046 		if (colon) {
1047 			ret = -EADDRNOTAVAIL;
1048 			if (!ifa)
1049 				break;
1050 			ret = 0;
1051 			if (!(ifr.ifr_flags & IFF_UP))
1052 				inet_del_ifa(in_dev, ifap, 1);
1053 			break;
1054 		}
1055 		ret = dev_change_flags(dev, ifr.ifr_flags);
1056 		break;
1057 
1058 	case SIOCSIFADDR:	/* Set interface address (and family) */
1059 		ret = -EINVAL;
1060 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1061 			break;
1062 
1063 		if (!ifa) {
1064 			ret = -ENOBUFS;
1065 			ifa = inet_alloc_ifa();
1066 			if (!ifa)
1067 				break;
1068 			INIT_HLIST_NODE(&ifa->hash);
1069 			if (colon)
1070 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1071 			else
1072 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1073 		} else {
1074 			ret = 0;
1075 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1076 				break;
1077 			inet_del_ifa(in_dev, ifap, 0);
1078 			ifa->ifa_broadcast = 0;
1079 			ifa->ifa_scope = 0;
1080 		}
1081 
1082 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1083 
1084 		if (!(dev->flags & IFF_POINTOPOINT)) {
1085 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1086 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1087 			if ((dev->flags & IFF_BROADCAST) &&
1088 			    ifa->ifa_prefixlen < 31)
1089 				ifa->ifa_broadcast = ifa->ifa_address |
1090 						     ~ifa->ifa_mask;
1091 		} else {
1092 			ifa->ifa_prefixlen = 32;
1093 			ifa->ifa_mask = inet_make_mask(32);
1094 		}
1095 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1096 		ret = inet_set_ifa(dev, ifa);
1097 		break;
1098 
1099 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1100 		ret = 0;
1101 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1102 			inet_del_ifa(in_dev, ifap, 0);
1103 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1104 			inet_insert_ifa(ifa);
1105 		}
1106 		break;
1107 
1108 	case SIOCSIFDSTADDR:	/* Set the destination address */
1109 		ret = 0;
1110 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1111 			break;
1112 		ret = -EINVAL;
1113 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1114 			break;
1115 		ret = 0;
1116 		inet_del_ifa(in_dev, ifap, 0);
1117 		ifa->ifa_address = sin->sin_addr.s_addr;
1118 		inet_insert_ifa(ifa);
1119 		break;
1120 
1121 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1122 
1123 		/*
1124 		 *	The mask we set must be legal.
1125 		 */
1126 		ret = -EINVAL;
1127 		if (bad_mask(sin->sin_addr.s_addr, 0))
1128 			break;
1129 		ret = 0;
1130 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1131 			__be32 old_mask = ifa->ifa_mask;
1132 			inet_del_ifa(in_dev, ifap, 0);
1133 			ifa->ifa_mask = sin->sin_addr.s_addr;
1134 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1135 
1136 			/* See if current broadcast address matches
1137 			 * with current netmask, then recalculate
1138 			 * the broadcast address. Otherwise it's a
1139 			 * funny address, so don't touch it since
1140 			 * the user seems to know what (s)he's doing...
1141 			 */
1142 			if ((dev->flags & IFF_BROADCAST) &&
1143 			    (ifa->ifa_prefixlen < 31) &&
1144 			    (ifa->ifa_broadcast ==
1145 			     (ifa->ifa_local|~old_mask))) {
1146 				ifa->ifa_broadcast = (ifa->ifa_local |
1147 						      ~sin->sin_addr.s_addr);
1148 			}
1149 			inet_insert_ifa(ifa);
1150 		}
1151 		break;
1152 	}
1153 done:
1154 	rtnl_unlock();
1155 out:
1156 	return ret;
1157 rarok:
1158 	rtnl_unlock();
1159 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1160 	goto out;
1161 }
1162 
1163 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1164 {
1165 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1166 	struct in_ifaddr *ifa;
1167 	struct ifreq ifr;
1168 	int done = 0;
1169 
1170 	if (!in_dev)
1171 		goto out;
1172 
1173 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1174 		if (!buf) {
1175 			done += sizeof(ifr);
1176 			continue;
1177 		}
1178 		if (len < (int) sizeof(ifr))
1179 			break;
1180 		memset(&ifr, 0, sizeof(struct ifreq));
1181 		strcpy(ifr.ifr_name, ifa->ifa_label);
1182 
1183 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1184 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1185 								ifa->ifa_local;
1186 
1187 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1188 			done = -EFAULT;
1189 			break;
1190 		}
1191 		buf  += sizeof(struct ifreq);
1192 		len  -= sizeof(struct ifreq);
1193 		done += sizeof(struct ifreq);
1194 	}
1195 out:
1196 	return done;
1197 }
1198 
1199 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1200 				 int scope)
1201 {
1202 	for_primary_ifa(in_dev) {
1203 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1204 		    ifa->ifa_scope <= scope)
1205 			return ifa->ifa_local;
1206 	} endfor_ifa(in_dev);
1207 
1208 	return 0;
1209 }
1210 
1211 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1212 {
1213 	__be32 addr = 0;
1214 	struct in_device *in_dev;
1215 	struct net *net = dev_net(dev);
1216 	int master_idx;
1217 
1218 	rcu_read_lock();
1219 	in_dev = __in_dev_get_rcu(dev);
1220 	if (!in_dev)
1221 		goto no_in_dev;
1222 
1223 	for_primary_ifa(in_dev) {
1224 		if (ifa->ifa_scope > scope)
1225 			continue;
1226 		if (!dst || inet_ifa_match(dst, ifa)) {
1227 			addr = ifa->ifa_local;
1228 			break;
1229 		}
1230 		if (!addr)
1231 			addr = ifa->ifa_local;
1232 	} endfor_ifa(in_dev);
1233 
1234 	if (addr)
1235 		goto out_unlock;
1236 no_in_dev:
1237 	master_idx = l3mdev_master_ifindex_rcu(dev);
1238 
1239 	/* For VRFs, the VRF device takes the place of the loopback device,
1240 	 * with addresses on it being preferred.  Note in such cases the
1241 	 * loopback device will be among the devices that fail the master_idx
1242 	 * equality check in the loop below.
1243 	 */
1244 	if (master_idx &&
1245 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1246 	    (in_dev = __in_dev_get_rcu(dev))) {
1247 		addr = in_dev_select_addr(in_dev, scope);
1248 		if (addr)
1249 			goto out_unlock;
1250 	}
1251 
1252 	/* Not loopback addresses on loopback should be preferred
1253 	   in this case. It is important that lo is the first interface
1254 	   in dev_base list.
1255 	 */
1256 	for_each_netdev_rcu(net, dev) {
1257 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1258 			continue;
1259 
1260 		in_dev = __in_dev_get_rcu(dev);
1261 		if (!in_dev)
1262 			continue;
1263 
1264 		addr = in_dev_select_addr(in_dev, scope);
1265 		if (addr)
1266 			goto out_unlock;
1267 	}
1268 out_unlock:
1269 	rcu_read_unlock();
1270 	return addr;
1271 }
1272 EXPORT_SYMBOL(inet_select_addr);
1273 
1274 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1275 			      __be32 local, int scope)
1276 {
1277 	int same = 0;
1278 	__be32 addr = 0;
1279 
1280 	for_ifa(in_dev) {
1281 		if (!addr &&
1282 		    (local == ifa->ifa_local || !local) &&
1283 		    ifa->ifa_scope <= scope) {
1284 			addr = ifa->ifa_local;
1285 			if (same)
1286 				break;
1287 		}
1288 		if (!same) {
1289 			same = (!local || inet_ifa_match(local, ifa)) &&
1290 				(!dst || inet_ifa_match(dst, ifa));
1291 			if (same && addr) {
1292 				if (local || !dst)
1293 					break;
1294 				/* Is the selected addr into dst subnet? */
1295 				if (inet_ifa_match(addr, ifa))
1296 					break;
1297 				/* No, then can we use new local src? */
1298 				if (ifa->ifa_scope <= scope) {
1299 					addr = ifa->ifa_local;
1300 					break;
1301 				}
1302 				/* search for large dst subnet for addr */
1303 				same = 0;
1304 			}
1305 		}
1306 	} endfor_ifa(in_dev);
1307 
1308 	return same ? addr : 0;
1309 }
1310 
1311 /*
1312  * Confirm that local IP address exists using wildcards:
1313  * - net: netns to check, cannot be NULL
1314  * - in_dev: only on this interface, NULL=any interface
1315  * - dst: only in the same subnet as dst, 0=any dst
1316  * - local: address, 0=autoselect the local address
1317  * - scope: maximum allowed scope value for the local address
1318  */
1319 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1320 			 __be32 dst, __be32 local, int scope)
1321 {
1322 	__be32 addr = 0;
1323 	struct net_device *dev;
1324 
1325 	if (in_dev)
1326 		return confirm_addr_indev(in_dev, dst, local, scope);
1327 
1328 	rcu_read_lock();
1329 	for_each_netdev_rcu(net, dev) {
1330 		in_dev = __in_dev_get_rcu(dev);
1331 		if (in_dev) {
1332 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1333 			if (addr)
1334 				break;
1335 		}
1336 	}
1337 	rcu_read_unlock();
1338 
1339 	return addr;
1340 }
1341 EXPORT_SYMBOL(inet_confirm_addr);
1342 
1343 /*
1344  *	Device notifier
1345  */
1346 
1347 int register_inetaddr_notifier(struct notifier_block *nb)
1348 {
1349 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1350 }
1351 EXPORT_SYMBOL(register_inetaddr_notifier);
1352 
1353 int unregister_inetaddr_notifier(struct notifier_block *nb)
1354 {
1355 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1356 }
1357 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1358 
1359 /* Rename ifa_labels for a device name change. Make some effort to preserve
1360  * existing alias numbering and to create unique labels if possible.
1361 */
1362 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1363 {
1364 	struct in_ifaddr *ifa;
1365 	int named = 0;
1366 
1367 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1368 		char old[IFNAMSIZ], *dot;
1369 
1370 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1371 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1372 		if (named++ == 0)
1373 			goto skip;
1374 		dot = strchr(old, ':');
1375 		if (!dot) {
1376 			sprintf(old, ":%d", named);
1377 			dot = old;
1378 		}
1379 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1380 			strcat(ifa->ifa_label, dot);
1381 		else
1382 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1383 skip:
1384 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1385 	}
1386 }
1387 
1388 static bool inetdev_valid_mtu(unsigned int mtu)
1389 {
1390 	return mtu >= 68;
1391 }
1392 
1393 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1394 					struct in_device *in_dev)
1395 
1396 {
1397 	struct in_ifaddr *ifa;
1398 
1399 	for (ifa = in_dev->ifa_list; ifa;
1400 	     ifa = ifa->ifa_next) {
1401 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1402 			 ifa->ifa_local, dev,
1403 			 ifa->ifa_local, NULL,
1404 			 dev->dev_addr, NULL);
1405 	}
1406 }
1407 
1408 /* Called only under RTNL semaphore */
1409 
1410 static int inetdev_event(struct notifier_block *this, unsigned long event,
1411 			 void *ptr)
1412 {
1413 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1414 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1415 
1416 	ASSERT_RTNL();
1417 
1418 	if (!in_dev) {
1419 		if (event == NETDEV_REGISTER) {
1420 			in_dev = inetdev_init(dev);
1421 			if (IS_ERR(in_dev))
1422 				return notifier_from_errno(PTR_ERR(in_dev));
1423 			if (dev->flags & IFF_LOOPBACK) {
1424 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1425 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1426 			}
1427 		} else if (event == NETDEV_CHANGEMTU) {
1428 			/* Re-enabling IP */
1429 			if (inetdev_valid_mtu(dev->mtu))
1430 				in_dev = inetdev_init(dev);
1431 		}
1432 		goto out;
1433 	}
1434 
1435 	switch (event) {
1436 	case NETDEV_REGISTER:
1437 		pr_debug("%s: bug\n", __func__);
1438 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1439 		break;
1440 	case NETDEV_UP:
1441 		if (!inetdev_valid_mtu(dev->mtu))
1442 			break;
1443 		if (dev->flags & IFF_LOOPBACK) {
1444 			struct in_ifaddr *ifa = inet_alloc_ifa();
1445 
1446 			if (ifa) {
1447 				INIT_HLIST_NODE(&ifa->hash);
1448 				ifa->ifa_local =
1449 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1450 				ifa->ifa_prefixlen = 8;
1451 				ifa->ifa_mask = inet_make_mask(8);
1452 				in_dev_hold(in_dev);
1453 				ifa->ifa_dev = in_dev;
1454 				ifa->ifa_scope = RT_SCOPE_HOST;
1455 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1456 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1457 						 INFINITY_LIFE_TIME);
1458 				ipv4_devconf_setall(in_dev);
1459 				neigh_parms_data_state_setall(in_dev->arp_parms);
1460 				inet_insert_ifa(ifa);
1461 			}
1462 		}
1463 		ip_mc_up(in_dev);
1464 		/* fall through */
1465 	case NETDEV_CHANGEADDR:
1466 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1467 			break;
1468 		/* fall through */
1469 	case NETDEV_NOTIFY_PEERS:
1470 		/* Send gratuitous ARP to notify of link change */
1471 		inetdev_send_gratuitous_arp(dev, in_dev);
1472 		break;
1473 	case NETDEV_DOWN:
1474 		ip_mc_down(in_dev);
1475 		break;
1476 	case NETDEV_PRE_TYPE_CHANGE:
1477 		ip_mc_unmap(in_dev);
1478 		break;
1479 	case NETDEV_POST_TYPE_CHANGE:
1480 		ip_mc_remap(in_dev);
1481 		break;
1482 	case NETDEV_CHANGEMTU:
1483 		if (inetdev_valid_mtu(dev->mtu))
1484 			break;
1485 		/* disable IP when MTU is not enough */
1486 	case NETDEV_UNREGISTER:
1487 		inetdev_destroy(in_dev);
1488 		break;
1489 	case NETDEV_CHANGENAME:
1490 		/* Do not notify about label change, this event is
1491 		 * not interesting to applications using netlink.
1492 		 */
1493 		inetdev_changename(dev, in_dev);
1494 
1495 		devinet_sysctl_unregister(in_dev);
1496 		devinet_sysctl_register(in_dev);
1497 		break;
1498 	}
1499 out:
1500 	return NOTIFY_DONE;
1501 }
1502 
1503 static struct notifier_block ip_netdev_notifier = {
1504 	.notifier_call = inetdev_event,
1505 };
1506 
1507 static size_t inet_nlmsg_size(void)
1508 {
1509 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1510 	       + nla_total_size(4) /* IFA_ADDRESS */
1511 	       + nla_total_size(4) /* IFA_LOCAL */
1512 	       + nla_total_size(4) /* IFA_BROADCAST */
1513 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1514 	       + nla_total_size(4)  /* IFA_FLAGS */
1515 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1516 }
1517 
1518 static inline u32 cstamp_delta(unsigned long cstamp)
1519 {
1520 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1521 }
1522 
1523 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1524 			 unsigned long tstamp, u32 preferred, u32 valid)
1525 {
1526 	struct ifa_cacheinfo ci;
1527 
1528 	ci.cstamp = cstamp_delta(cstamp);
1529 	ci.tstamp = cstamp_delta(tstamp);
1530 	ci.ifa_prefered = preferred;
1531 	ci.ifa_valid = valid;
1532 
1533 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1534 }
1535 
1536 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1537 			    u32 portid, u32 seq, int event, unsigned int flags)
1538 {
1539 	struct ifaddrmsg *ifm;
1540 	struct nlmsghdr  *nlh;
1541 	u32 preferred, valid;
1542 
1543 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1544 	if (!nlh)
1545 		return -EMSGSIZE;
1546 
1547 	ifm = nlmsg_data(nlh);
1548 	ifm->ifa_family = AF_INET;
1549 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1550 	ifm->ifa_flags = ifa->ifa_flags;
1551 	ifm->ifa_scope = ifa->ifa_scope;
1552 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1553 
1554 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1555 		preferred = ifa->ifa_preferred_lft;
1556 		valid = ifa->ifa_valid_lft;
1557 		if (preferred != INFINITY_LIFE_TIME) {
1558 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1559 
1560 			if (preferred > tval)
1561 				preferred -= tval;
1562 			else
1563 				preferred = 0;
1564 			if (valid != INFINITY_LIFE_TIME) {
1565 				if (valid > tval)
1566 					valid -= tval;
1567 				else
1568 					valid = 0;
1569 			}
1570 		}
1571 	} else {
1572 		preferred = INFINITY_LIFE_TIME;
1573 		valid = INFINITY_LIFE_TIME;
1574 	}
1575 	if ((ifa->ifa_address &&
1576 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1577 	    (ifa->ifa_local &&
1578 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1579 	    (ifa->ifa_broadcast &&
1580 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1581 	    (ifa->ifa_label[0] &&
1582 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1583 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1584 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1585 			  preferred, valid))
1586 		goto nla_put_failure;
1587 
1588 	nlmsg_end(skb, nlh);
1589 	return 0;
1590 
1591 nla_put_failure:
1592 	nlmsg_cancel(skb, nlh);
1593 	return -EMSGSIZE;
1594 }
1595 
1596 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1597 {
1598 	struct net *net = sock_net(skb->sk);
1599 	int h, s_h;
1600 	int idx, s_idx;
1601 	int ip_idx, s_ip_idx;
1602 	struct net_device *dev;
1603 	struct in_device *in_dev;
1604 	struct in_ifaddr *ifa;
1605 	struct hlist_head *head;
1606 
1607 	s_h = cb->args[0];
1608 	s_idx = idx = cb->args[1];
1609 	s_ip_idx = ip_idx = cb->args[2];
1610 
1611 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1612 		idx = 0;
1613 		head = &net->dev_index_head[h];
1614 		rcu_read_lock();
1615 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1616 			  net->dev_base_seq;
1617 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1618 			if (idx < s_idx)
1619 				goto cont;
1620 			if (h > s_h || idx > s_idx)
1621 				s_ip_idx = 0;
1622 			in_dev = __in_dev_get_rcu(dev);
1623 			if (!in_dev)
1624 				goto cont;
1625 
1626 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1627 			     ifa = ifa->ifa_next, ip_idx++) {
1628 				if (ip_idx < s_ip_idx)
1629 					continue;
1630 				if (inet_fill_ifaddr(skb, ifa,
1631 					     NETLINK_CB(cb->skb).portid,
1632 					     cb->nlh->nlmsg_seq,
1633 					     RTM_NEWADDR, NLM_F_MULTI) < 0) {
1634 					rcu_read_unlock();
1635 					goto done;
1636 				}
1637 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1638 			}
1639 cont:
1640 			idx++;
1641 		}
1642 		rcu_read_unlock();
1643 	}
1644 
1645 done:
1646 	cb->args[0] = h;
1647 	cb->args[1] = idx;
1648 	cb->args[2] = ip_idx;
1649 
1650 	return skb->len;
1651 }
1652 
1653 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1654 		      u32 portid)
1655 {
1656 	struct sk_buff *skb;
1657 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1658 	int err = -ENOBUFS;
1659 	struct net *net;
1660 
1661 	net = dev_net(ifa->ifa_dev->dev);
1662 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1663 	if (!skb)
1664 		goto errout;
1665 
1666 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1667 	if (err < 0) {
1668 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1669 		WARN_ON(err == -EMSGSIZE);
1670 		kfree_skb(skb);
1671 		goto errout;
1672 	}
1673 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1674 	return;
1675 errout:
1676 	if (err < 0)
1677 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1678 }
1679 
1680 static size_t inet_get_link_af_size(const struct net_device *dev,
1681 				    u32 ext_filter_mask)
1682 {
1683 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1684 
1685 	if (!in_dev)
1686 		return 0;
1687 
1688 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1689 }
1690 
1691 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1692 			     u32 ext_filter_mask)
1693 {
1694 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1695 	struct nlattr *nla;
1696 	int i;
1697 
1698 	if (!in_dev)
1699 		return -ENODATA;
1700 
1701 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1702 	if (!nla)
1703 		return -EMSGSIZE;
1704 
1705 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1706 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1707 
1708 	return 0;
1709 }
1710 
1711 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1712 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1713 };
1714 
1715 static int inet_validate_link_af(const struct net_device *dev,
1716 				 const struct nlattr *nla)
1717 {
1718 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1719 	int err, rem;
1720 
1721 	if (dev && !__in_dev_get_rtnl(dev))
1722 		return -EAFNOSUPPORT;
1723 
1724 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1725 	if (err < 0)
1726 		return err;
1727 
1728 	if (tb[IFLA_INET_CONF]) {
1729 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1730 			int cfgid = nla_type(a);
1731 
1732 			if (nla_len(a) < 4)
1733 				return -EINVAL;
1734 
1735 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1736 				return -EINVAL;
1737 		}
1738 	}
1739 
1740 	return 0;
1741 }
1742 
1743 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1744 {
1745 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1746 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1747 	int rem;
1748 
1749 	if (!in_dev)
1750 		return -EAFNOSUPPORT;
1751 
1752 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1753 		BUG();
1754 
1755 	if (tb[IFLA_INET_CONF]) {
1756 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1757 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1758 	}
1759 
1760 	return 0;
1761 }
1762 
1763 static int inet_netconf_msgsize_devconf(int type)
1764 {
1765 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1766 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1767 	bool all = false;
1768 
1769 	if (type == NETCONFA_ALL)
1770 		all = true;
1771 
1772 	if (all || type == NETCONFA_FORWARDING)
1773 		size += nla_total_size(4);
1774 	if (all || type == NETCONFA_RP_FILTER)
1775 		size += nla_total_size(4);
1776 	if (all || type == NETCONFA_MC_FORWARDING)
1777 		size += nla_total_size(4);
1778 	if (all || type == NETCONFA_PROXY_NEIGH)
1779 		size += nla_total_size(4);
1780 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1781 		size += nla_total_size(4);
1782 
1783 	return size;
1784 }
1785 
1786 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1787 				     struct ipv4_devconf *devconf, u32 portid,
1788 				     u32 seq, int event, unsigned int flags,
1789 				     int type)
1790 {
1791 	struct nlmsghdr  *nlh;
1792 	struct netconfmsg *ncm;
1793 	bool all = false;
1794 
1795 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1796 			flags);
1797 	if (!nlh)
1798 		return -EMSGSIZE;
1799 
1800 	if (type == NETCONFA_ALL)
1801 		all = true;
1802 
1803 	ncm = nlmsg_data(nlh);
1804 	ncm->ncm_family = AF_INET;
1805 
1806 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1807 		goto nla_put_failure;
1808 
1809 	if (!devconf)
1810 		goto out;
1811 
1812 	if ((all || type == NETCONFA_FORWARDING) &&
1813 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1814 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1815 		goto nla_put_failure;
1816 	if ((all || type == NETCONFA_RP_FILTER) &&
1817 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1818 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1819 		goto nla_put_failure;
1820 	if ((all || type == NETCONFA_MC_FORWARDING) &&
1821 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1822 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1823 		goto nla_put_failure;
1824 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
1825 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1826 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1827 		goto nla_put_failure;
1828 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1829 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1830 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1831 		goto nla_put_failure;
1832 
1833 out:
1834 	nlmsg_end(skb, nlh);
1835 	return 0;
1836 
1837 nla_put_failure:
1838 	nlmsg_cancel(skb, nlh);
1839 	return -EMSGSIZE;
1840 }
1841 
1842 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1843 				 int ifindex, struct ipv4_devconf *devconf)
1844 {
1845 	struct sk_buff *skb;
1846 	int err = -ENOBUFS;
1847 
1848 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1849 	if (!skb)
1850 		goto errout;
1851 
1852 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1853 					event, 0, type);
1854 	if (err < 0) {
1855 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1856 		WARN_ON(err == -EMSGSIZE);
1857 		kfree_skb(skb);
1858 		goto errout;
1859 	}
1860 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1861 	return;
1862 errout:
1863 	if (err < 0)
1864 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1865 }
1866 
1867 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1868 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1869 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1870 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1871 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1872 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
1873 };
1874 
1875 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1876 				    struct nlmsghdr *nlh,
1877 				    struct netlink_ext_ack *extack)
1878 {
1879 	struct net *net = sock_net(in_skb->sk);
1880 	struct nlattr *tb[NETCONFA_MAX+1];
1881 	struct netconfmsg *ncm;
1882 	struct sk_buff *skb;
1883 	struct ipv4_devconf *devconf;
1884 	struct in_device *in_dev;
1885 	struct net_device *dev;
1886 	int ifindex;
1887 	int err;
1888 
1889 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1890 			  devconf_ipv4_policy, extack);
1891 	if (err < 0)
1892 		goto errout;
1893 
1894 	err = -EINVAL;
1895 	if (!tb[NETCONFA_IFINDEX])
1896 		goto errout;
1897 
1898 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1899 	switch (ifindex) {
1900 	case NETCONFA_IFINDEX_ALL:
1901 		devconf = net->ipv4.devconf_all;
1902 		break;
1903 	case NETCONFA_IFINDEX_DEFAULT:
1904 		devconf = net->ipv4.devconf_dflt;
1905 		break;
1906 	default:
1907 		dev = __dev_get_by_index(net, ifindex);
1908 		if (!dev)
1909 			goto errout;
1910 		in_dev = __in_dev_get_rtnl(dev);
1911 		if (!in_dev)
1912 			goto errout;
1913 		devconf = &in_dev->cnf;
1914 		break;
1915 	}
1916 
1917 	err = -ENOBUFS;
1918 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1919 	if (!skb)
1920 		goto errout;
1921 
1922 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1923 					NETLINK_CB(in_skb).portid,
1924 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1925 					NETCONFA_ALL);
1926 	if (err < 0) {
1927 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1928 		WARN_ON(err == -EMSGSIZE);
1929 		kfree_skb(skb);
1930 		goto errout;
1931 	}
1932 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1933 errout:
1934 	return err;
1935 }
1936 
1937 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1938 				     struct netlink_callback *cb)
1939 {
1940 	struct net *net = sock_net(skb->sk);
1941 	int h, s_h;
1942 	int idx, s_idx;
1943 	struct net_device *dev;
1944 	struct in_device *in_dev;
1945 	struct hlist_head *head;
1946 
1947 	s_h = cb->args[0];
1948 	s_idx = idx = cb->args[1];
1949 
1950 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1951 		idx = 0;
1952 		head = &net->dev_index_head[h];
1953 		rcu_read_lock();
1954 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1955 			  net->dev_base_seq;
1956 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1957 			if (idx < s_idx)
1958 				goto cont;
1959 			in_dev = __in_dev_get_rcu(dev);
1960 			if (!in_dev)
1961 				goto cont;
1962 
1963 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1964 						      &in_dev->cnf,
1965 						      NETLINK_CB(cb->skb).portid,
1966 						      cb->nlh->nlmsg_seq,
1967 						      RTM_NEWNETCONF,
1968 						      NLM_F_MULTI,
1969 						      NETCONFA_ALL) < 0) {
1970 				rcu_read_unlock();
1971 				goto done;
1972 			}
1973 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1974 cont:
1975 			idx++;
1976 		}
1977 		rcu_read_unlock();
1978 	}
1979 	if (h == NETDEV_HASHENTRIES) {
1980 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1981 					      net->ipv4.devconf_all,
1982 					      NETLINK_CB(cb->skb).portid,
1983 					      cb->nlh->nlmsg_seq,
1984 					      RTM_NEWNETCONF, NLM_F_MULTI,
1985 					      NETCONFA_ALL) < 0)
1986 			goto done;
1987 		else
1988 			h++;
1989 	}
1990 	if (h == NETDEV_HASHENTRIES + 1) {
1991 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1992 					      net->ipv4.devconf_dflt,
1993 					      NETLINK_CB(cb->skb).portid,
1994 					      cb->nlh->nlmsg_seq,
1995 					      RTM_NEWNETCONF, NLM_F_MULTI,
1996 					      NETCONFA_ALL) < 0)
1997 			goto done;
1998 		else
1999 			h++;
2000 	}
2001 done:
2002 	cb->args[0] = h;
2003 	cb->args[1] = idx;
2004 
2005 	return skb->len;
2006 }
2007 
2008 #ifdef CONFIG_SYSCTL
2009 
2010 static void devinet_copy_dflt_conf(struct net *net, int i)
2011 {
2012 	struct net_device *dev;
2013 
2014 	rcu_read_lock();
2015 	for_each_netdev_rcu(net, dev) {
2016 		struct in_device *in_dev;
2017 
2018 		in_dev = __in_dev_get_rcu(dev);
2019 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2020 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2021 	}
2022 	rcu_read_unlock();
2023 }
2024 
2025 /* called with RTNL locked */
2026 static void inet_forward_change(struct net *net)
2027 {
2028 	struct net_device *dev;
2029 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2030 
2031 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2032 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2033 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2034 				    NETCONFA_FORWARDING,
2035 				    NETCONFA_IFINDEX_ALL,
2036 				    net->ipv4.devconf_all);
2037 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2038 				    NETCONFA_FORWARDING,
2039 				    NETCONFA_IFINDEX_DEFAULT,
2040 				    net->ipv4.devconf_dflt);
2041 
2042 	for_each_netdev(net, dev) {
2043 		struct in_device *in_dev;
2044 
2045 		if (on)
2046 			dev_disable_lro(dev);
2047 
2048 		in_dev = __in_dev_get_rtnl(dev);
2049 		if (in_dev) {
2050 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2051 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2052 						    NETCONFA_FORWARDING,
2053 						    dev->ifindex, &in_dev->cnf);
2054 		}
2055 	}
2056 }
2057 
2058 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2059 {
2060 	if (cnf == net->ipv4.devconf_dflt)
2061 		return NETCONFA_IFINDEX_DEFAULT;
2062 	else if (cnf == net->ipv4.devconf_all)
2063 		return NETCONFA_IFINDEX_ALL;
2064 	else {
2065 		struct in_device *idev
2066 			= container_of(cnf, struct in_device, cnf);
2067 		return idev->dev->ifindex;
2068 	}
2069 }
2070 
2071 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2072 			     void __user *buffer,
2073 			     size_t *lenp, loff_t *ppos)
2074 {
2075 	int old_value = *(int *)ctl->data;
2076 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2077 	int new_value = *(int *)ctl->data;
2078 
2079 	if (write) {
2080 		struct ipv4_devconf *cnf = ctl->extra1;
2081 		struct net *net = ctl->extra2;
2082 		int i = (int *)ctl->data - cnf->data;
2083 		int ifindex;
2084 
2085 		set_bit(i, cnf->state);
2086 
2087 		if (cnf == net->ipv4.devconf_dflt)
2088 			devinet_copy_dflt_conf(net, i);
2089 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2090 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2091 			if ((new_value == 0) && (old_value != 0))
2092 				rt_cache_flush(net);
2093 
2094 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2095 		    new_value != old_value) {
2096 			ifindex = devinet_conf_ifindex(net, cnf);
2097 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2098 						    NETCONFA_RP_FILTER,
2099 						    ifindex, cnf);
2100 		}
2101 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2102 		    new_value != old_value) {
2103 			ifindex = devinet_conf_ifindex(net, cnf);
2104 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2105 						    NETCONFA_PROXY_NEIGH,
2106 						    ifindex, cnf);
2107 		}
2108 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2109 		    new_value != old_value) {
2110 			ifindex = devinet_conf_ifindex(net, cnf);
2111 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2112 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2113 						    ifindex, cnf);
2114 		}
2115 	}
2116 
2117 	return ret;
2118 }
2119 
2120 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2121 				  void __user *buffer,
2122 				  size_t *lenp, loff_t *ppos)
2123 {
2124 	int *valp = ctl->data;
2125 	int val = *valp;
2126 	loff_t pos = *ppos;
2127 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2128 
2129 	if (write && *valp != val) {
2130 		struct net *net = ctl->extra2;
2131 
2132 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2133 			if (!rtnl_trylock()) {
2134 				/* Restore the original values before restarting */
2135 				*valp = val;
2136 				*ppos = pos;
2137 				return restart_syscall();
2138 			}
2139 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2140 				inet_forward_change(net);
2141 			} else {
2142 				struct ipv4_devconf *cnf = ctl->extra1;
2143 				struct in_device *idev =
2144 					container_of(cnf, struct in_device, cnf);
2145 				if (*valp)
2146 					dev_disable_lro(idev->dev);
2147 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2148 							    NETCONFA_FORWARDING,
2149 							    idev->dev->ifindex,
2150 							    cnf);
2151 			}
2152 			rtnl_unlock();
2153 			rt_cache_flush(net);
2154 		} else
2155 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2156 						    NETCONFA_FORWARDING,
2157 						    NETCONFA_IFINDEX_DEFAULT,
2158 						    net->ipv4.devconf_dflt);
2159 	}
2160 
2161 	return ret;
2162 }
2163 
2164 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2165 				void __user *buffer,
2166 				size_t *lenp, loff_t *ppos)
2167 {
2168 	int *valp = ctl->data;
2169 	int val = *valp;
2170 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2171 	struct net *net = ctl->extra2;
2172 
2173 	if (write && *valp != val)
2174 		rt_cache_flush(net);
2175 
2176 	return ret;
2177 }
2178 
2179 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2180 	{ \
2181 		.procname	= name, \
2182 		.data		= ipv4_devconf.data + \
2183 				  IPV4_DEVCONF_ ## attr - 1, \
2184 		.maxlen		= sizeof(int), \
2185 		.mode		= mval, \
2186 		.proc_handler	= proc, \
2187 		.extra1		= &ipv4_devconf, \
2188 	}
2189 
2190 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2191 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2192 
2193 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2194 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2195 
2196 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2197 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2198 
2199 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2200 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2201 
2202 static struct devinet_sysctl_table {
2203 	struct ctl_table_header *sysctl_header;
2204 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2205 } devinet_sysctl = {
2206 	.devinet_vars = {
2207 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2208 					     devinet_sysctl_forward),
2209 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2210 
2211 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2212 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2213 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2214 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2215 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2216 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2217 					"accept_source_route"),
2218 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2219 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2220 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2221 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2222 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2223 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2224 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2225 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2226 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2227 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2228 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2229 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2230 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2231 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2232 					"force_igmp_version"),
2233 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2234 					"igmpv2_unsolicited_report_interval"),
2235 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2236 					"igmpv3_unsolicited_report_interval"),
2237 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2238 					"ignore_routes_with_linkdown"),
2239 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2240 					"drop_gratuitous_arp"),
2241 
2242 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2243 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2244 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2245 					      "promote_secondaries"),
2246 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2247 					      "route_localnet"),
2248 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2249 					      "drop_unicast_in_l2_multicast"),
2250 	},
2251 };
2252 
2253 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2254 				     int ifindex, struct ipv4_devconf *p)
2255 {
2256 	int i;
2257 	struct devinet_sysctl_table *t;
2258 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2259 
2260 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2261 	if (!t)
2262 		goto out;
2263 
2264 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2265 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2266 		t->devinet_vars[i].extra1 = p;
2267 		t->devinet_vars[i].extra2 = net;
2268 	}
2269 
2270 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2271 
2272 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2273 	if (!t->sysctl_header)
2274 		goto free;
2275 
2276 	p->sysctl = t;
2277 
2278 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2279 				    ifindex, p);
2280 	return 0;
2281 
2282 free:
2283 	kfree(t);
2284 out:
2285 	return -ENOBUFS;
2286 }
2287 
2288 static void __devinet_sysctl_unregister(struct net *net,
2289 					struct ipv4_devconf *cnf, int ifindex)
2290 {
2291 	struct devinet_sysctl_table *t = cnf->sysctl;
2292 
2293 	if (t) {
2294 		cnf->sysctl = NULL;
2295 		unregister_net_sysctl_table(t->sysctl_header);
2296 		kfree(t);
2297 	}
2298 
2299 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2300 }
2301 
2302 static int devinet_sysctl_register(struct in_device *idev)
2303 {
2304 	int err;
2305 
2306 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2307 		return -EINVAL;
2308 
2309 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2310 	if (err)
2311 		return err;
2312 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2313 					idev->dev->ifindex, &idev->cnf);
2314 	if (err)
2315 		neigh_sysctl_unregister(idev->arp_parms);
2316 	return err;
2317 }
2318 
2319 static void devinet_sysctl_unregister(struct in_device *idev)
2320 {
2321 	struct net *net = dev_net(idev->dev);
2322 
2323 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2324 	neigh_sysctl_unregister(idev->arp_parms);
2325 }
2326 
2327 static struct ctl_table ctl_forward_entry[] = {
2328 	{
2329 		.procname	= "ip_forward",
2330 		.data		= &ipv4_devconf.data[
2331 					IPV4_DEVCONF_FORWARDING - 1],
2332 		.maxlen		= sizeof(int),
2333 		.mode		= 0644,
2334 		.proc_handler	= devinet_sysctl_forward,
2335 		.extra1		= &ipv4_devconf,
2336 		.extra2		= &init_net,
2337 	},
2338 	{ },
2339 };
2340 #endif
2341 
2342 static __net_init int devinet_init_net(struct net *net)
2343 {
2344 	int err;
2345 	struct ipv4_devconf *all, *dflt;
2346 #ifdef CONFIG_SYSCTL
2347 	struct ctl_table *tbl = ctl_forward_entry;
2348 	struct ctl_table_header *forw_hdr;
2349 #endif
2350 
2351 	err = -ENOMEM;
2352 	all = &ipv4_devconf;
2353 	dflt = &ipv4_devconf_dflt;
2354 
2355 	if (!net_eq(net, &init_net)) {
2356 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2357 		if (!all)
2358 			goto err_alloc_all;
2359 
2360 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2361 		if (!dflt)
2362 			goto err_alloc_dflt;
2363 
2364 #ifdef CONFIG_SYSCTL
2365 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2366 		if (!tbl)
2367 			goto err_alloc_ctl;
2368 
2369 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2370 		tbl[0].extra1 = all;
2371 		tbl[0].extra2 = net;
2372 #endif
2373 	}
2374 
2375 #ifdef CONFIG_SYSCTL
2376 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2377 	if (err < 0)
2378 		goto err_reg_all;
2379 
2380 	err = __devinet_sysctl_register(net, "default",
2381 					NETCONFA_IFINDEX_DEFAULT, dflt);
2382 	if (err < 0)
2383 		goto err_reg_dflt;
2384 
2385 	err = -ENOMEM;
2386 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2387 	if (!forw_hdr)
2388 		goto err_reg_ctl;
2389 	net->ipv4.forw_hdr = forw_hdr;
2390 #endif
2391 
2392 	net->ipv4.devconf_all = all;
2393 	net->ipv4.devconf_dflt = dflt;
2394 	return 0;
2395 
2396 #ifdef CONFIG_SYSCTL
2397 err_reg_ctl:
2398 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2399 err_reg_dflt:
2400 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2401 err_reg_all:
2402 	if (tbl != ctl_forward_entry)
2403 		kfree(tbl);
2404 err_alloc_ctl:
2405 #endif
2406 	if (dflt != &ipv4_devconf_dflt)
2407 		kfree(dflt);
2408 err_alloc_dflt:
2409 	if (all != &ipv4_devconf)
2410 		kfree(all);
2411 err_alloc_all:
2412 	return err;
2413 }
2414 
2415 static __net_exit void devinet_exit_net(struct net *net)
2416 {
2417 #ifdef CONFIG_SYSCTL
2418 	struct ctl_table *tbl;
2419 
2420 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2421 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2422 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2423 				    NETCONFA_IFINDEX_DEFAULT);
2424 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2425 				    NETCONFA_IFINDEX_ALL);
2426 	kfree(tbl);
2427 #endif
2428 	kfree(net->ipv4.devconf_dflt);
2429 	kfree(net->ipv4.devconf_all);
2430 }
2431 
2432 static __net_initdata struct pernet_operations devinet_ops = {
2433 	.init = devinet_init_net,
2434 	.exit = devinet_exit_net,
2435 };
2436 
2437 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2438 	.family		  = AF_INET,
2439 	.fill_link_af	  = inet_fill_link_af,
2440 	.get_link_af_size = inet_get_link_af_size,
2441 	.validate_link_af = inet_validate_link_af,
2442 	.set_link_af	  = inet_set_link_af,
2443 };
2444 
2445 void __init devinet_init(void)
2446 {
2447 	int i;
2448 
2449 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2450 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2451 
2452 	register_pernet_subsys(&devinet_ops);
2453 
2454 	register_gifconf(PF_INET, inet_gifconf);
2455 	register_netdevice_notifier(&ip_netdev_notifier);
2456 
2457 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2458 
2459 	rtnl_af_register(&inet_af_ops);
2460 
2461 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2462 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2463 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2464 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2465 		      inet_netconf_dump_devconf, NULL);
2466 }
2467