xref: /openbmc/linux/net/ipv4/devinet.c (revision 95e9fd10)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
65 
66 #include "fib_lookup.h"
67 
68 static struct ipv4_devconf ipv4_devconf = {
69 	.data = {
70 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
71 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
74 	},
75 };
76 
77 static struct ipv4_devconf ipv4_devconf_dflt = {
78 	.data = {
79 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
80 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
81 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
82 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
83 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
84 	},
85 };
86 
87 #define IPV4_DEVCONF_DFLT(net, attr) \
88 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
89 
90 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
91 	[IFA_LOCAL]     	= { .type = NLA_U32 },
92 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
93 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
94 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 };
96 
97 /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
98  * value.  So if you change this define, make appropriate changes to
99  * inet_addr_hash as well.
100  */
101 #define IN4_ADDR_HSIZE	256
102 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
103 static DEFINE_SPINLOCK(inet_addr_hash_lock);
104 
105 static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
106 {
107 	u32 val = (__force u32) addr ^ hash_ptr(net, 8);
108 
109 	return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
110 		(IN4_ADDR_HSIZE - 1));
111 }
112 
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115 	unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
116 
117 	spin_lock(&inet_addr_hash_lock);
118 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119 	spin_unlock(&inet_addr_hash_lock);
120 }
121 
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124 	spin_lock(&inet_addr_hash_lock);
125 	hlist_del_init_rcu(&ifa->hash);
126 	spin_unlock(&inet_addr_hash_lock);
127 }
128 
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139 	unsigned int hash = inet_addr_hash(net, addr);
140 	struct net_device *result = NULL;
141 	struct in_ifaddr *ifa;
142 	struct hlist_node *node;
143 
144 	rcu_read_lock();
145 	hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
146 		struct net_device *dev = ifa->ifa_dev->dev;
147 
148 		if (!net_eq(dev_net(dev), net))
149 			continue;
150 		if (ifa->ifa_local == addr) {
151 			result = dev;
152 			break;
153 		}
154 	}
155 	if (!result) {
156 		struct flowi4 fl4 = { .daddr = addr };
157 		struct fib_result res = { 0 };
158 		struct fib_table *local;
159 
160 		/* Fallback to FIB local table so that communication
161 		 * over loopback subnets work.
162 		 */
163 		local = fib_get_table(net, RT_TABLE_LOCAL);
164 		if (local &&
165 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166 		    res.type == RTN_LOCAL)
167 			result = FIB_RES_DEV(res);
168 	}
169 	if (result && devref)
170 		dev_hold(result);
171 	rcu_read_unlock();
172 	return result;
173 }
174 EXPORT_SYMBOL(__ip_dev_find);
175 
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177 
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
180 			 int destroy);
181 #ifdef CONFIG_SYSCTL
182 static void devinet_sysctl_register(struct in_device *idev);
183 static void devinet_sysctl_unregister(struct in_device *idev);
184 #else
185 static inline void devinet_sysctl_register(struct in_device *idev)
186 {
187 }
188 static inline void devinet_sysctl_unregister(struct in_device *idev)
189 {
190 }
191 #endif
192 
193 /* Locks all the inet devices. */
194 
195 static struct in_ifaddr *inet_alloc_ifa(void)
196 {
197 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
198 }
199 
200 static void inet_rcu_free_ifa(struct rcu_head *head)
201 {
202 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
203 	if (ifa->ifa_dev)
204 		in_dev_put(ifa->ifa_dev);
205 	kfree(ifa);
206 }
207 
208 static inline void inet_free_ifa(struct in_ifaddr *ifa)
209 {
210 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
211 }
212 
213 void in_dev_finish_destroy(struct in_device *idev)
214 {
215 	struct net_device *dev = idev->dev;
216 
217 	WARN_ON(idev->ifa_list);
218 	WARN_ON(idev->mc_list);
219 #ifdef NET_REFCNT_DEBUG
220 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
221 #endif
222 	dev_put(dev);
223 	if (!idev->dead)
224 		pr_err("Freeing alive in_device %p\n", idev);
225 	else
226 		kfree(idev);
227 }
228 EXPORT_SYMBOL(in_dev_finish_destroy);
229 
230 static struct in_device *inetdev_init(struct net_device *dev)
231 {
232 	struct in_device *in_dev;
233 
234 	ASSERT_RTNL();
235 
236 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
237 	if (!in_dev)
238 		goto out;
239 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
240 			sizeof(in_dev->cnf));
241 	in_dev->cnf.sysctl = NULL;
242 	in_dev->dev = dev;
243 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
244 	if (!in_dev->arp_parms)
245 		goto out_kfree;
246 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
247 		dev_disable_lro(dev);
248 	/* Reference in_dev->dev */
249 	dev_hold(dev);
250 	/* Account for reference dev->ip_ptr (below) */
251 	in_dev_hold(in_dev);
252 
253 	devinet_sysctl_register(in_dev);
254 	ip_mc_init_dev(in_dev);
255 	if (dev->flags & IFF_UP)
256 		ip_mc_up(in_dev);
257 
258 	/* we can receive as soon as ip_ptr is set -- do this last */
259 	rcu_assign_pointer(dev->ip_ptr, in_dev);
260 out:
261 	return in_dev;
262 out_kfree:
263 	kfree(in_dev);
264 	in_dev = NULL;
265 	goto out;
266 }
267 
268 static void in_dev_rcu_put(struct rcu_head *head)
269 {
270 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
271 	in_dev_put(idev);
272 }
273 
274 static void inetdev_destroy(struct in_device *in_dev)
275 {
276 	struct in_ifaddr *ifa;
277 	struct net_device *dev;
278 
279 	ASSERT_RTNL();
280 
281 	dev = in_dev->dev;
282 
283 	in_dev->dead = 1;
284 
285 	ip_mc_destroy_dev(in_dev);
286 
287 	while ((ifa = in_dev->ifa_list) != NULL) {
288 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
289 		inet_free_ifa(ifa);
290 	}
291 
292 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
293 
294 	devinet_sysctl_unregister(in_dev);
295 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
296 	arp_ifdown(dev);
297 
298 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
299 }
300 
301 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
302 {
303 	rcu_read_lock();
304 	for_primary_ifa(in_dev) {
305 		if (inet_ifa_match(a, ifa)) {
306 			if (!b || inet_ifa_match(b, ifa)) {
307 				rcu_read_unlock();
308 				return 1;
309 			}
310 		}
311 	} endfor_ifa(in_dev);
312 	rcu_read_unlock();
313 	return 0;
314 }
315 
316 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
317 			 int destroy, struct nlmsghdr *nlh, u32 pid)
318 {
319 	struct in_ifaddr *promote = NULL;
320 	struct in_ifaddr *ifa, *ifa1 = *ifap;
321 	struct in_ifaddr *last_prim = in_dev->ifa_list;
322 	struct in_ifaddr *prev_prom = NULL;
323 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
324 
325 	ASSERT_RTNL();
326 
327 	/* 1. Deleting primary ifaddr forces deletion all secondaries
328 	 * unless alias promotion is set
329 	 **/
330 
331 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
332 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
333 
334 		while ((ifa = *ifap1) != NULL) {
335 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
336 			    ifa1->ifa_scope <= ifa->ifa_scope)
337 				last_prim = ifa;
338 
339 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
340 			    ifa1->ifa_mask != ifa->ifa_mask ||
341 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
342 				ifap1 = &ifa->ifa_next;
343 				prev_prom = ifa;
344 				continue;
345 			}
346 
347 			if (!do_promote) {
348 				inet_hash_remove(ifa);
349 				*ifap1 = ifa->ifa_next;
350 
351 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
352 				blocking_notifier_call_chain(&inetaddr_chain,
353 						NETDEV_DOWN, ifa);
354 				inet_free_ifa(ifa);
355 			} else {
356 				promote = ifa;
357 				break;
358 			}
359 		}
360 	}
361 
362 	/* On promotion all secondaries from subnet are changing
363 	 * the primary IP, we must remove all their routes silently
364 	 * and later to add them back with new prefsrc. Do this
365 	 * while all addresses are on the device list.
366 	 */
367 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
368 		if (ifa1->ifa_mask == ifa->ifa_mask &&
369 		    inet_ifa_match(ifa1->ifa_address, ifa))
370 			fib_del_ifaddr(ifa, ifa1);
371 	}
372 
373 	/* 2. Unlink it */
374 
375 	*ifap = ifa1->ifa_next;
376 	inet_hash_remove(ifa1);
377 
378 	/* 3. Announce address deletion */
379 
380 	/* Send message first, then call notifier.
381 	   At first sight, FIB update triggered by notifier
382 	   will refer to already deleted ifaddr, that could confuse
383 	   netlink listeners. It is not true: look, gated sees
384 	   that route deleted and if it still thinks that ifaddr
385 	   is valid, it will try to restore deleted routes... Grr.
386 	   So that, this order is correct.
387 	 */
388 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
389 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
390 
391 	if (promote) {
392 		struct in_ifaddr *next_sec = promote->ifa_next;
393 
394 		if (prev_prom) {
395 			prev_prom->ifa_next = promote->ifa_next;
396 			promote->ifa_next = last_prim->ifa_next;
397 			last_prim->ifa_next = promote;
398 		}
399 
400 		promote->ifa_flags &= ~IFA_F_SECONDARY;
401 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
402 		blocking_notifier_call_chain(&inetaddr_chain,
403 				NETDEV_UP, promote);
404 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
405 			if (ifa1->ifa_mask != ifa->ifa_mask ||
406 			    !inet_ifa_match(ifa1->ifa_address, ifa))
407 					continue;
408 			fib_add_ifaddr(ifa);
409 		}
410 
411 	}
412 	if (destroy)
413 		inet_free_ifa(ifa1);
414 }
415 
416 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
417 			 int destroy)
418 {
419 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
420 }
421 
422 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
423 			     u32 pid)
424 {
425 	struct in_device *in_dev = ifa->ifa_dev;
426 	struct in_ifaddr *ifa1, **ifap, **last_primary;
427 
428 	ASSERT_RTNL();
429 
430 	if (!ifa->ifa_local) {
431 		inet_free_ifa(ifa);
432 		return 0;
433 	}
434 
435 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
436 	last_primary = &in_dev->ifa_list;
437 
438 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
439 	     ifap = &ifa1->ifa_next) {
440 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
441 		    ifa->ifa_scope <= ifa1->ifa_scope)
442 			last_primary = &ifa1->ifa_next;
443 		if (ifa1->ifa_mask == ifa->ifa_mask &&
444 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
445 			if (ifa1->ifa_local == ifa->ifa_local) {
446 				inet_free_ifa(ifa);
447 				return -EEXIST;
448 			}
449 			if (ifa1->ifa_scope != ifa->ifa_scope) {
450 				inet_free_ifa(ifa);
451 				return -EINVAL;
452 			}
453 			ifa->ifa_flags |= IFA_F_SECONDARY;
454 		}
455 	}
456 
457 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
458 		net_srandom(ifa->ifa_local);
459 		ifap = last_primary;
460 	}
461 
462 	ifa->ifa_next = *ifap;
463 	*ifap = ifa;
464 
465 	inet_hash_insert(dev_net(in_dev->dev), ifa);
466 
467 	/* Send message first, then call notifier.
468 	   Notifier will trigger FIB update, so that
469 	   listeners of netlink will know about new ifaddr */
470 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
471 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
472 
473 	return 0;
474 }
475 
476 static int inet_insert_ifa(struct in_ifaddr *ifa)
477 {
478 	return __inet_insert_ifa(ifa, NULL, 0);
479 }
480 
481 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
482 {
483 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
484 
485 	ASSERT_RTNL();
486 
487 	if (!in_dev) {
488 		inet_free_ifa(ifa);
489 		return -ENOBUFS;
490 	}
491 	ipv4_devconf_setall(in_dev);
492 	if (ifa->ifa_dev != in_dev) {
493 		WARN_ON(ifa->ifa_dev);
494 		in_dev_hold(in_dev);
495 		ifa->ifa_dev = in_dev;
496 	}
497 	if (ipv4_is_loopback(ifa->ifa_local))
498 		ifa->ifa_scope = RT_SCOPE_HOST;
499 	return inet_insert_ifa(ifa);
500 }
501 
502 /* Caller must hold RCU or RTNL :
503  * We dont take a reference on found in_device
504  */
505 struct in_device *inetdev_by_index(struct net *net, int ifindex)
506 {
507 	struct net_device *dev;
508 	struct in_device *in_dev = NULL;
509 
510 	rcu_read_lock();
511 	dev = dev_get_by_index_rcu(net, ifindex);
512 	if (dev)
513 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
514 	rcu_read_unlock();
515 	return in_dev;
516 }
517 EXPORT_SYMBOL(inetdev_by_index);
518 
519 /* Called only from RTNL semaphored context. No locks. */
520 
521 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
522 				    __be32 mask)
523 {
524 	ASSERT_RTNL();
525 
526 	for_primary_ifa(in_dev) {
527 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
528 			return ifa;
529 	} endfor_ifa(in_dev);
530 	return NULL;
531 }
532 
533 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
534 {
535 	struct net *net = sock_net(skb->sk);
536 	struct nlattr *tb[IFA_MAX+1];
537 	struct in_device *in_dev;
538 	struct ifaddrmsg *ifm;
539 	struct in_ifaddr *ifa, **ifap;
540 	int err = -EINVAL;
541 
542 	ASSERT_RTNL();
543 
544 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
545 	if (err < 0)
546 		goto errout;
547 
548 	ifm = nlmsg_data(nlh);
549 	in_dev = inetdev_by_index(net, ifm->ifa_index);
550 	if (in_dev == NULL) {
551 		err = -ENODEV;
552 		goto errout;
553 	}
554 
555 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
556 	     ifap = &ifa->ifa_next) {
557 		if (tb[IFA_LOCAL] &&
558 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
559 			continue;
560 
561 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
562 			continue;
563 
564 		if (tb[IFA_ADDRESS] &&
565 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
566 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
567 			continue;
568 
569 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
570 		return 0;
571 	}
572 
573 	err = -EADDRNOTAVAIL;
574 errout:
575 	return err;
576 }
577 
578 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
579 {
580 	struct nlattr *tb[IFA_MAX+1];
581 	struct in_ifaddr *ifa;
582 	struct ifaddrmsg *ifm;
583 	struct net_device *dev;
584 	struct in_device *in_dev;
585 	int err;
586 
587 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
588 	if (err < 0)
589 		goto errout;
590 
591 	ifm = nlmsg_data(nlh);
592 	err = -EINVAL;
593 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
594 		goto errout;
595 
596 	dev = __dev_get_by_index(net, ifm->ifa_index);
597 	err = -ENODEV;
598 	if (dev == NULL)
599 		goto errout;
600 
601 	in_dev = __in_dev_get_rtnl(dev);
602 	err = -ENOBUFS;
603 	if (in_dev == NULL)
604 		goto errout;
605 
606 	ifa = inet_alloc_ifa();
607 	if (ifa == NULL)
608 		/*
609 		 * A potential indev allocation can be left alive, it stays
610 		 * assigned to its device and is destroy with it.
611 		 */
612 		goto errout;
613 
614 	ipv4_devconf_setall(in_dev);
615 	in_dev_hold(in_dev);
616 
617 	if (tb[IFA_ADDRESS] == NULL)
618 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
619 
620 	INIT_HLIST_NODE(&ifa->hash);
621 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
622 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
623 	ifa->ifa_flags = ifm->ifa_flags;
624 	ifa->ifa_scope = ifm->ifa_scope;
625 	ifa->ifa_dev = in_dev;
626 
627 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
628 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
629 
630 	if (tb[IFA_BROADCAST])
631 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
632 
633 	if (tb[IFA_LABEL])
634 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
635 	else
636 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
637 
638 	return ifa;
639 
640 errout:
641 	return ERR_PTR(err);
642 }
643 
644 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
645 {
646 	struct net *net = sock_net(skb->sk);
647 	struct in_ifaddr *ifa;
648 
649 	ASSERT_RTNL();
650 
651 	ifa = rtm_to_ifaddr(net, nlh);
652 	if (IS_ERR(ifa))
653 		return PTR_ERR(ifa);
654 
655 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
656 }
657 
658 /*
659  *	Determine a default network mask, based on the IP address.
660  */
661 
662 static inline int inet_abc_len(__be32 addr)
663 {
664 	int rc = -1;	/* Something else, probably a multicast. */
665 
666 	if (ipv4_is_zeronet(addr))
667 		rc = 0;
668 	else {
669 		__u32 haddr = ntohl(addr);
670 
671 		if (IN_CLASSA(haddr))
672 			rc = 8;
673 		else if (IN_CLASSB(haddr))
674 			rc = 16;
675 		else if (IN_CLASSC(haddr))
676 			rc = 24;
677 	}
678 
679 	return rc;
680 }
681 
682 
683 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
684 {
685 	struct ifreq ifr;
686 	struct sockaddr_in sin_orig;
687 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
688 	struct in_device *in_dev;
689 	struct in_ifaddr **ifap = NULL;
690 	struct in_ifaddr *ifa = NULL;
691 	struct net_device *dev;
692 	char *colon;
693 	int ret = -EFAULT;
694 	int tryaddrmatch = 0;
695 
696 	/*
697 	 *	Fetch the caller's info block into kernel space
698 	 */
699 
700 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
701 		goto out;
702 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
703 
704 	/* save original address for comparison */
705 	memcpy(&sin_orig, sin, sizeof(*sin));
706 
707 	colon = strchr(ifr.ifr_name, ':');
708 	if (colon)
709 		*colon = 0;
710 
711 	dev_load(net, ifr.ifr_name);
712 
713 	switch (cmd) {
714 	case SIOCGIFADDR:	/* Get interface address */
715 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
716 	case SIOCGIFDSTADDR:	/* Get the destination address */
717 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
718 		/* Note that these ioctls will not sleep,
719 		   so that we do not impose a lock.
720 		   One day we will be forced to put shlock here (I mean SMP)
721 		 */
722 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
723 		memset(sin, 0, sizeof(*sin));
724 		sin->sin_family = AF_INET;
725 		break;
726 
727 	case SIOCSIFFLAGS:
728 		ret = -EACCES;
729 		if (!capable(CAP_NET_ADMIN))
730 			goto out;
731 		break;
732 	case SIOCSIFADDR:	/* Set interface address (and family) */
733 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
734 	case SIOCSIFDSTADDR:	/* Set the destination address */
735 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
736 		ret = -EACCES;
737 		if (!capable(CAP_NET_ADMIN))
738 			goto out;
739 		ret = -EINVAL;
740 		if (sin->sin_family != AF_INET)
741 			goto out;
742 		break;
743 	default:
744 		ret = -EINVAL;
745 		goto out;
746 	}
747 
748 	rtnl_lock();
749 
750 	ret = -ENODEV;
751 	dev = __dev_get_by_name(net, ifr.ifr_name);
752 	if (!dev)
753 		goto done;
754 
755 	if (colon)
756 		*colon = ':';
757 
758 	in_dev = __in_dev_get_rtnl(dev);
759 	if (in_dev) {
760 		if (tryaddrmatch) {
761 			/* Matthias Andree */
762 			/* compare label and address (4.4BSD style) */
763 			/* note: we only do this for a limited set of ioctls
764 			   and only if the original address family was AF_INET.
765 			   This is checked above. */
766 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
767 			     ifap = &ifa->ifa_next) {
768 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
769 				    sin_orig.sin_addr.s_addr ==
770 							ifa->ifa_local) {
771 					break; /* found */
772 				}
773 			}
774 		}
775 		/* we didn't get a match, maybe the application is
776 		   4.3BSD-style and passed in junk so we fall back to
777 		   comparing just the label */
778 		if (!ifa) {
779 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
780 			     ifap = &ifa->ifa_next)
781 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
782 					break;
783 		}
784 	}
785 
786 	ret = -EADDRNOTAVAIL;
787 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
788 		goto done;
789 
790 	switch (cmd) {
791 	case SIOCGIFADDR:	/* Get interface address */
792 		sin->sin_addr.s_addr = ifa->ifa_local;
793 		goto rarok;
794 
795 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
796 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
797 		goto rarok;
798 
799 	case SIOCGIFDSTADDR:	/* Get the destination address */
800 		sin->sin_addr.s_addr = ifa->ifa_address;
801 		goto rarok;
802 
803 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
804 		sin->sin_addr.s_addr = ifa->ifa_mask;
805 		goto rarok;
806 
807 	case SIOCSIFFLAGS:
808 		if (colon) {
809 			ret = -EADDRNOTAVAIL;
810 			if (!ifa)
811 				break;
812 			ret = 0;
813 			if (!(ifr.ifr_flags & IFF_UP))
814 				inet_del_ifa(in_dev, ifap, 1);
815 			break;
816 		}
817 		ret = dev_change_flags(dev, ifr.ifr_flags);
818 		break;
819 
820 	case SIOCSIFADDR:	/* Set interface address (and family) */
821 		ret = -EINVAL;
822 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
823 			break;
824 
825 		if (!ifa) {
826 			ret = -ENOBUFS;
827 			ifa = inet_alloc_ifa();
828 			INIT_HLIST_NODE(&ifa->hash);
829 			if (!ifa)
830 				break;
831 			if (colon)
832 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
833 			else
834 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
835 		} else {
836 			ret = 0;
837 			if (ifa->ifa_local == sin->sin_addr.s_addr)
838 				break;
839 			inet_del_ifa(in_dev, ifap, 0);
840 			ifa->ifa_broadcast = 0;
841 			ifa->ifa_scope = 0;
842 		}
843 
844 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
845 
846 		if (!(dev->flags & IFF_POINTOPOINT)) {
847 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
848 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
849 			if ((dev->flags & IFF_BROADCAST) &&
850 			    ifa->ifa_prefixlen < 31)
851 				ifa->ifa_broadcast = ifa->ifa_address |
852 						     ~ifa->ifa_mask;
853 		} else {
854 			ifa->ifa_prefixlen = 32;
855 			ifa->ifa_mask = inet_make_mask(32);
856 		}
857 		ret = inet_set_ifa(dev, ifa);
858 		break;
859 
860 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
861 		ret = 0;
862 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
863 			inet_del_ifa(in_dev, ifap, 0);
864 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
865 			inet_insert_ifa(ifa);
866 		}
867 		break;
868 
869 	case SIOCSIFDSTADDR:	/* Set the destination address */
870 		ret = 0;
871 		if (ifa->ifa_address == sin->sin_addr.s_addr)
872 			break;
873 		ret = -EINVAL;
874 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
875 			break;
876 		ret = 0;
877 		inet_del_ifa(in_dev, ifap, 0);
878 		ifa->ifa_address = sin->sin_addr.s_addr;
879 		inet_insert_ifa(ifa);
880 		break;
881 
882 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
883 
884 		/*
885 		 *	The mask we set must be legal.
886 		 */
887 		ret = -EINVAL;
888 		if (bad_mask(sin->sin_addr.s_addr, 0))
889 			break;
890 		ret = 0;
891 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
892 			__be32 old_mask = ifa->ifa_mask;
893 			inet_del_ifa(in_dev, ifap, 0);
894 			ifa->ifa_mask = sin->sin_addr.s_addr;
895 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
896 
897 			/* See if current broadcast address matches
898 			 * with current netmask, then recalculate
899 			 * the broadcast address. Otherwise it's a
900 			 * funny address, so don't touch it since
901 			 * the user seems to know what (s)he's doing...
902 			 */
903 			if ((dev->flags & IFF_BROADCAST) &&
904 			    (ifa->ifa_prefixlen < 31) &&
905 			    (ifa->ifa_broadcast ==
906 			     (ifa->ifa_local|~old_mask))) {
907 				ifa->ifa_broadcast = (ifa->ifa_local |
908 						      ~sin->sin_addr.s_addr);
909 			}
910 			inet_insert_ifa(ifa);
911 		}
912 		break;
913 	}
914 done:
915 	rtnl_unlock();
916 out:
917 	return ret;
918 rarok:
919 	rtnl_unlock();
920 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
921 	goto out;
922 }
923 
924 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
925 {
926 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
927 	struct in_ifaddr *ifa;
928 	struct ifreq ifr;
929 	int done = 0;
930 
931 	if (!in_dev)
932 		goto out;
933 
934 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
935 		if (!buf) {
936 			done += sizeof(ifr);
937 			continue;
938 		}
939 		if (len < (int) sizeof(ifr))
940 			break;
941 		memset(&ifr, 0, sizeof(struct ifreq));
942 		if (ifa->ifa_label)
943 			strcpy(ifr.ifr_name, ifa->ifa_label);
944 		else
945 			strcpy(ifr.ifr_name, dev->name);
946 
947 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
948 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
949 								ifa->ifa_local;
950 
951 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
952 			done = -EFAULT;
953 			break;
954 		}
955 		buf  += sizeof(struct ifreq);
956 		len  -= sizeof(struct ifreq);
957 		done += sizeof(struct ifreq);
958 	}
959 out:
960 	return done;
961 }
962 
963 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
964 {
965 	__be32 addr = 0;
966 	struct in_device *in_dev;
967 	struct net *net = dev_net(dev);
968 
969 	rcu_read_lock();
970 	in_dev = __in_dev_get_rcu(dev);
971 	if (!in_dev)
972 		goto no_in_dev;
973 
974 	for_primary_ifa(in_dev) {
975 		if (ifa->ifa_scope > scope)
976 			continue;
977 		if (!dst || inet_ifa_match(dst, ifa)) {
978 			addr = ifa->ifa_local;
979 			break;
980 		}
981 		if (!addr)
982 			addr = ifa->ifa_local;
983 	} endfor_ifa(in_dev);
984 
985 	if (addr)
986 		goto out_unlock;
987 no_in_dev:
988 
989 	/* Not loopback addresses on loopback should be preferred
990 	   in this case. It is importnat that lo is the first interface
991 	   in dev_base list.
992 	 */
993 	for_each_netdev_rcu(net, dev) {
994 		in_dev = __in_dev_get_rcu(dev);
995 		if (!in_dev)
996 			continue;
997 
998 		for_primary_ifa(in_dev) {
999 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1000 			    ifa->ifa_scope <= scope) {
1001 				addr = ifa->ifa_local;
1002 				goto out_unlock;
1003 			}
1004 		} endfor_ifa(in_dev);
1005 	}
1006 out_unlock:
1007 	rcu_read_unlock();
1008 	return addr;
1009 }
1010 EXPORT_SYMBOL(inet_select_addr);
1011 
1012 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1013 			      __be32 local, int scope)
1014 {
1015 	int same = 0;
1016 	__be32 addr = 0;
1017 
1018 	for_ifa(in_dev) {
1019 		if (!addr &&
1020 		    (local == ifa->ifa_local || !local) &&
1021 		    ifa->ifa_scope <= scope) {
1022 			addr = ifa->ifa_local;
1023 			if (same)
1024 				break;
1025 		}
1026 		if (!same) {
1027 			same = (!local || inet_ifa_match(local, ifa)) &&
1028 				(!dst || inet_ifa_match(dst, ifa));
1029 			if (same && addr) {
1030 				if (local || !dst)
1031 					break;
1032 				/* Is the selected addr into dst subnet? */
1033 				if (inet_ifa_match(addr, ifa))
1034 					break;
1035 				/* No, then can we use new local src? */
1036 				if (ifa->ifa_scope <= scope) {
1037 					addr = ifa->ifa_local;
1038 					break;
1039 				}
1040 				/* search for large dst subnet for addr */
1041 				same = 0;
1042 			}
1043 		}
1044 	} endfor_ifa(in_dev);
1045 
1046 	return same ? addr : 0;
1047 }
1048 
1049 /*
1050  * Confirm that local IP address exists using wildcards:
1051  * - in_dev: only on this interface, 0=any interface
1052  * - dst: only in the same subnet as dst, 0=any dst
1053  * - local: address, 0=autoselect the local address
1054  * - scope: maximum allowed scope value for the local address
1055  */
1056 __be32 inet_confirm_addr(struct in_device *in_dev,
1057 			 __be32 dst, __be32 local, int scope)
1058 {
1059 	__be32 addr = 0;
1060 	struct net_device *dev;
1061 	struct net *net;
1062 
1063 	if (scope != RT_SCOPE_LINK)
1064 		return confirm_addr_indev(in_dev, dst, local, scope);
1065 
1066 	net = dev_net(in_dev->dev);
1067 	rcu_read_lock();
1068 	for_each_netdev_rcu(net, dev) {
1069 		in_dev = __in_dev_get_rcu(dev);
1070 		if (in_dev) {
1071 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1072 			if (addr)
1073 				break;
1074 		}
1075 	}
1076 	rcu_read_unlock();
1077 
1078 	return addr;
1079 }
1080 EXPORT_SYMBOL(inet_confirm_addr);
1081 
1082 /*
1083  *	Device notifier
1084  */
1085 
1086 int register_inetaddr_notifier(struct notifier_block *nb)
1087 {
1088 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1089 }
1090 EXPORT_SYMBOL(register_inetaddr_notifier);
1091 
1092 int unregister_inetaddr_notifier(struct notifier_block *nb)
1093 {
1094 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1095 }
1096 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1097 
1098 /* Rename ifa_labels for a device name change. Make some effort to preserve
1099  * existing alias numbering and to create unique labels if possible.
1100 */
1101 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1102 {
1103 	struct in_ifaddr *ifa;
1104 	int named = 0;
1105 
1106 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1107 		char old[IFNAMSIZ], *dot;
1108 
1109 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1110 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1111 		if (named++ == 0)
1112 			goto skip;
1113 		dot = strchr(old, ':');
1114 		if (dot == NULL) {
1115 			sprintf(old, ":%d", named);
1116 			dot = old;
1117 		}
1118 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1119 			strcat(ifa->ifa_label, dot);
1120 		else
1121 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1122 skip:
1123 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1124 	}
1125 }
1126 
1127 static inline bool inetdev_valid_mtu(unsigned int mtu)
1128 {
1129 	return mtu >= 68;
1130 }
1131 
1132 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1133 					struct in_device *in_dev)
1134 
1135 {
1136 	struct in_ifaddr *ifa;
1137 
1138 	for (ifa = in_dev->ifa_list; ifa;
1139 	     ifa = ifa->ifa_next) {
1140 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1141 			 ifa->ifa_local, dev,
1142 			 ifa->ifa_local, NULL,
1143 			 dev->dev_addr, NULL);
1144 	}
1145 }
1146 
1147 /* Called only under RTNL semaphore */
1148 
1149 static int inetdev_event(struct notifier_block *this, unsigned long event,
1150 			 void *ptr)
1151 {
1152 	struct net_device *dev = ptr;
1153 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1154 
1155 	ASSERT_RTNL();
1156 
1157 	if (!in_dev) {
1158 		if (event == NETDEV_REGISTER) {
1159 			in_dev = inetdev_init(dev);
1160 			if (!in_dev)
1161 				return notifier_from_errno(-ENOMEM);
1162 			if (dev->flags & IFF_LOOPBACK) {
1163 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1164 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1165 			}
1166 		} else if (event == NETDEV_CHANGEMTU) {
1167 			/* Re-enabling IP */
1168 			if (inetdev_valid_mtu(dev->mtu))
1169 				in_dev = inetdev_init(dev);
1170 		}
1171 		goto out;
1172 	}
1173 
1174 	switch (event) {
1175 	case NETDEV_REGISTER:
1176 		pr_debug("%s: bug\n", __func__);
1177 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1178 		break;
1179 	case NETDEV_UP:
1180 		if (!inetdev_valid_mtu(dev->mtu))
1181 			break;
1182 		if (dev->flags & IFF_LOOPBACK) {
1183 			struct in_ifaddr *ifa = inet_alloc_ifa();
1184 
1185 			if (ifa) {
1186 				INIT_HLIST_NODE(&ifa->hash);
1187 				ifa->ifa_local =
1188 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1189 				ifa->ifa_prefixlen = 8;
1190 				ifa->ifa_mask = inet_make_mask(8);
1191 				in_dev_hold(in_dev);
1192 				ifa->ifa_dev = in_dev;
1193 				ifa->ifa_scope = RT_SCOPE_HOST;
1194 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1195 				inet_insert_ifa(ifa);
1196 			}
1197 		}
1198 		ip_mc_up(in_dev);
1199 		/* fall through */
1200 	case NETDEV_CHANGEADDR:
1201 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1202 			break;
1203 		/* fall through */
1204 	case NETDEV_NOTIFY_PEERS:
1205 		/* Send gratuitous ARP to notify of link change */
1206 		inetdev_send_gratuitous_arp(dev, in_dev);
1207 		break;
1208 	case NETDEV_DOWN:
1209 		ip_mc_down(in_dev);
1210 		break;
1211 	case NETDEV_PRE_TYPE_CHANGE:
1212 		ip_mc_unmap(in_dev);
1213 		break;
1214 	case NETDEV_POST_TYPE_CHANGE:
1215 		ip_mc_remap(in_dev);
1216 		break;
1217 	case NETDEV_CHANGEMTU:
1218 		if (inetdev_valid_mtu(dev->mtu))
1219 			break;
1220 		/* disable IP when MTU is not enough */
1221 	case NETDEV_UNREGISTER:
1222 		inetdev_destroy(in_dev);
1223 		break;
1224 	case NETDEV_CHANGENAME:
1225 		/* Do not notify about label change, this event is
1226 		 * not interesting to applications using netlink.
1227 		 */
1228 		inetdev_changename(dev, in_dev);
1229 
1230 		devinet_sysctl_unregister(in_dev);
1231 		devinet_sysctl_register(in_dev);
1232 		break;
1233 	}
1234 out:
1235 	return NOTIFY_DONE;
1236 }
1237 
1238 static struct notifier_block ip_netdev_notifier = {
1239 	.notifier_call = inetdev_event,
1240 };
1241 
1242 static inline size_t inet_nlmsg_size(void)
1243 {
1244 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1245 	       + nla_total_size(4) /* IFA_ADDRESS */
1246 	       + nla_total_size(4) /* IFA_LOCAL */
1247 	       + nla_total_size(4) /* IFA_BROADCAST */
1248 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1249 }
1250 
1251 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1252 			    u32 pid, u32 seq, int event, unsigned int flags)
1253 {
1254 	struct ifaddrmsg *ifm;
1255 	struct nlmsghdr  *nlh;
1256 
1257 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1258 	if (nlh == NULL)
1259 		return -EMSGSIZE;
1260 
1261 	ifm = nlmsg_data(nlh);
1262 	ifm->ifa_family = AF_INET;
1263 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1264 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1265 	ifm->ifa_scope = ifa->ifa_scope;
1266 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1267 
1268 	if ((ifa->ifa_address &&
1269 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1270 	    (ifa->ifa_local &&
1271 	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1272 	    (ifa->ifa_broadcast &&
1273 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1274 	    (ifa->ifa_label[0] &&
1275 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
1276 		goto nla_put_failure;
1277 
1278 	return nlmsg_end(skb, nlh);
1279 
1280 nla_put_failure:
1281 	nlmsg_cancel(skb, nlh);
1282 	return -EMSGSIZE;
1283 }
1284 
1285 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1286 {
1287 	struct net *net = sock_net(skb->sk);
1288 	int h, s_h;
1289 	int idx, s_idx;
1290 	int ip_idx, s_ip_idx;
1291 	struct net_device *dev;
1292 	struct in_device *in_dev;
1293 	struct in_ifaddr *ifa;
1294 	struct hlist_head *head;
1295 	struct hlist_node *node;
1296 
1297 	s_h = cb->args[0];
1298 	s_idx = idx = cb->args[1];
1299 	s_ip_idx = ip_idx = cb->args[2];
1300 
1301 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1302 		idx = 0;
1303 		head = &net->dev_index_head[h];
1304 		rcu_read_lock();
1305 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1306 			if (idx < s_idx)
1307 				goto cont;
1308 			if (h > s_h || idx > s_idx)
1309 				s_ip_idx = 0;
1310 			in_dev = __in_dev_get_rcu(dev);
1311 			if (!in_dev)
1312 				goto cont;
1313 
1314 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1315 			     ifa = ifa->ifa_next, ip_idx++) {
1316 				if (ip_idx < s_ip_idx)
1317 					continue;
1318 				if (inet_fill_ifaddr(skb, ifa,
1319 					     NETLINK_CB(cb->skb).pid,
1320 					     cb->nlh->nlmsg_seq,
1321 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1322 					rcu_read_unlock();
1323 					goto done;
1324 				}
1325 			}
1326 cont:
1327 			idx++;
1328 		}
1329 		rcu_read_unlock();
1330 	}
1331 
1332 done:
1333 	cb->args[0] = h;
1334 	cb->args[1] = idx;
1335 	cb->args[2] = ip_idx;
1336 
1337 	return skb->len;
1338 }
1339 
1340 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1341 		      u32 pid)
1342 {
1343 	struct sk_buff *skb;
1344 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1345 	int err = -ENOBUFS;
1346 	struct net *net;
1347 
1348 	net = dev_net(ifa->ifa_dev->dev);
1349 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1350 	if (skb == NULL)
1351 		goto errout;
1352 
1353 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1354 	if (err < 0) {
1355 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1356 		WARN_ON(err == -EMSGSIZE);
1357 		kfree_skb(skb);
1358 		goto errout;
1359 	}
1360 	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1361 	return;
1362 errout:
1363 	if (err < 0)
1364 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1365 }
1366 
1367 static size_t inet_get_link_af_size(const struct net_device *dev)
1368 {
1369 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1370 
1371 	if (!in_dev)
1372 		return 0;
1373 
1374 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1375 }
1376 
1377 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1378 {
1379 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1380 	struct nlattr *nla;
1381 	int i;
1382 
1383 	if (!in_dev)
1384 		return -ENODATA;
1385 
1386 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1387 	if (nla == NULL)
1388 		return -EMSGSIZE;
1389 
1390 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1391 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1392 
1393 	return 0;
1394 }
1395 
1396 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1397 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1398 };
1399 
1400 static int inet_validate_link_af(const struct net_device *dev,
1401 				 const struct nlattr *nla)
1402 {
1403 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1404 	int err, rem;
1405 
1406 	if (dev && !__in_dev_get_rtnl(dev))
1407 		return -EAFNOSUPPORT;
1408 
1409 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1410 	if (err < 0)
1411 		return err;
1412 
1413 	if (tb[IFLA_INET_CONF]) {
1414 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1415 			int cfgid = nla_type(a);
1416 
1417 			if (nla_len(a) < 4)
1418 				return -EINVAL;
1419 
1420 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1421 				return -EINVAL;
1422 		}
1423 	}
1424 
1425 	return 0;
1426 }
1427 
1428 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1429 {
1430 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1431 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1432 	int rem;
1433 
1434 	if (!in_dev)
1435 		return -EAFNOSUPPORT;
1436 
1437 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1438 		BUG();
1439 
1440 	if (tb[IFLA_INET_CONF]) {
1441 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1442 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1443 	}
1444 
1445 	return 0;
1446 }
1447 
1448 #ifdef CONFIG_SYSCTL
1449 
1450 static void devinet_copy_dflt_conf(struct net *net, int i)
1451 {
1452 	struct net_device *dev;
1453 
1454 	rcu_read_lock();
1455 	for_each_netdev_rcu(net, dev) {
1456 		struct in_device *in_dev;
1457 
1458 		in_dev = __in_dev_get_rcu(dev);
1459 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1460 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1461 	}
1462 	rcu_read_unlock();
1463 }
1464 
1465 /* called with RTNL locked */
1466 static void inet_forward_change(struct net *net)
1467 {
1468 	struct net_device *dev;
1469 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1470 
1471 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1472 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1473 
1474 	for_each_netdev(net, dev) {
1475 		struct in_device *in_dev;
1476 		if (on)
1477 			dev_disable_lro(dev);
1478 		rcu_read_lock();
1479 		in_dev = __in_dev_get_rcu(dev);
1480 		if (in_dev)
1481 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1482 		rcu_read_unlock();
1483 	}
1484 }
1485 
1486 static int devinet_conf_proc(ctl_table *ctl, int write,
1487 			     void __user *buffer,
1488 			     size_t *lenp, loff_t *ppos)
1489 {
1490 	int old_value = *(int *)ctl->data;
1491 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1492 	int new_value = *(int *)ctl->data;
1493 
1494 	if (write) {
1495 		struct ipv4_devconf *cnf = ctl->extra1;
1496 		struct net *net = ctl->extra2;
1497 		int i = (int *)ctl->data - cnf->data;
1498 
1499 		set_bit(i, cnf->state);
1500 
1501 		if (cnf == net->ipv4.devconf_dflt)
1502 			devinet_copy_dflt_conf(net, i);
1503 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1504 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1505 			if ((new_value == 0) && (old_value != 0))
1506 				rt_cache_flush(net, 0);
1507 	}
1508 
1509 	return ret;
1510 }
1511 
1512 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1513 				  void __user *buffer,
1514 				  size_t *lenp, loff_t *ppos)
1515 {
1516 	int *valp = ctl->data;
1517 	int val = *valp;
1518 	loff_t pos = *ppos;
1519 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1520 
1521 	if (write && *valp != val) {
1522 		struct net *net = ctl->extra2;
1523 
1524 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1525 			if (!rtnl_trylock()) {
1526 				/* Restore the original values before restarting */
1527 				*valp = val;
1528 				*ppos = pos;
1529 				return restart_syscall();
1530 			}
1531 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1532 				inet_forward_change(net);
1533 			} else if (*valp) {
1534 				struct ipv4_devconf *cnf = ctl->extra1;
1535 				struct in_device *idev =
1536 					container_of(cnf, struct in_device, cnf);
1537 				dev_disable_lro(idev->dev);
1538 			}
1539 			rtnl_unlock();
1540 			rt_cache_flush(net, 0);
1541 		}
1542 	}
1543 
1544 	return ret;
1545 }
1546 
1547 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1548 				void __user *buffer,
1549 				size_t *lenp, loff_t *ppos)
1550 {
1551 	int *valp = ctl->data;
1552 	int val = *valp;
1553 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1554 	struct net *net = ctl->extra2;
1555 
1556 	if (write && *valp != val)
1557 		rt_cache_flush(net, 0);
1558 
1559 	return ret;
1560 }
1561 
1562 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1563 	{ \
1564 		.procname	= name, \
1565 		.data		= ipv4_devconf.data + \
1566 				  IPV4_DEVCONF_ ## attr - 1, \
1567 		.maxlen		= sizeof(int), \
1568 		.mode		= mval, \
1569 		.proc_handler	= proc, \
1570 		.extra1		= &ipv4_devconf, \
1571 	}
1572 
1573 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1574 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1575 
1576 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1577 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1578 
1579 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1580 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1581 
1582 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1583 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1584 
1585 static struct devinet_sysctl_table {
1586 	struct ctl_table_header *sysctl_header;
1587 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1588 } devinet_sysctl = {
1589 	.devinet_vars = {
1590 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1591 					     devinet_sysctl_forward),
1592 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1593 
1594 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1595 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1596 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1597 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1598 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1599 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1600 					"accept_source_route"),
1601 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1602 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1603 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1604 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1605 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1606 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1607 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1608 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1609 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1610 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1611 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1612 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1613 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1614 
1615 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1616 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1617 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1618 					      "force_igmp_version"),
1619 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1620 					      "promote_secondaries"),
1621 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
1622 					      "route_localnet"),
1623 	},
1624 };
1625 
1626 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1627 					struct ipv4_devconf *p)
1628 {
1629 	int i;
1630 	struct devinet_sysctl_table *t;
1631 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
1632 
1633 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1634 	if (!t)
1635 		goto out;
1636 
1637 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1638 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1639 		t->devinet_vars[i].extra1 = p;
1640 		t->devinet_vars[i].extra2 = net;
1641 	}
1642 
1643 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
1644 
1645 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
1646 	if (!t->sysctl_header)
1647 		goto free;
1648 
1649 	p->sysctl = t;
1650 	return 0;
1651 
1652 free:
1653 	kfree(t);
1654 out:
1655 	return -ENOBUFS;
1656 }
1657 
1658 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1659 {
1660 	struct devinet_sysctl_table *t = cnf->sysctl;
1661 
1662 	if (t == NULL)
1663 		return;
1664 
1665 	cnf->sysctl = NULL;
1666 	unregister_net_sysctl_table(t->sysctl_header);
1667 	kfree(t);
1668 }
1669 
1670 static void devinet_sysctl_register(struct in_device *idev)
1671 {
1672 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1673 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1674 					&idev->cnf);
1675 }
1676 
1677 static void devinet_sysctl_unregister(struct in_device *idev)
1678 {
1679 	__devinet_sysctl_unregister(&idev->cnf);
1680 	neigh_sysctl_unregister(idev->arp_parms);
1681 }
1682 
1683 static struct ctl_table ctl_forward_entry[] = {
1684 	{
1685 		.procname	= "ip_forward",
1686 		.data		= &ipv4_devconf.data[
1687 					IPV4_DEVCONF_FORWARDING - 1],
1688 		.maxlen		= sizeof(int),
1689 		.mode		= 0644,
1690 		.proc_handler	= devinet_sysctl_forward,
1691 		.extra1		= &ipv4_devconf,
1692 		.extra2		= &init_net,
1693 	},
1694 	{ },
1695 };
1696 #endif
1697 
1698 static __net_init int devinet_init_net(struct net *net)
1699 {
1700 	int err;
1701 	struct ipv4_devconf *all, *dflt;
1702 #ifdef CONFIG_SYSCTL
1703 	struct ctl_table *tbl = ctl_forward_entry;
1704 	struct ctl_table_header *forw_hdr;
1705 #endif
1706 
1707 	err = -ENOMEM;
1708 	all = &ipv4_devconf;
1709 	dflt = &ipv4_devconf_dflt;
1710 
1711 	if (!net_eq(net, &init_net)) {
1712 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1713 		if (all == NULL)
1714 			goto err_alloc_all;
1715 
1716 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1717 		if (dflt == NULL)
1718 			goto err_alloc_dflt;
1719 
1720 #ifdef CONFIG_SYSCTL
1721 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1722 		if (tbl == NULL)
1723 			goto err_alloc_ctl;
1724 
1725 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1726 		tbl[0].extra1 = all;
1727 		tbl[0].extra2 = net;
1728 #endif
1729 	}
1730 
1731 #ifdef CONFIG_SYSCTL
1732 	err = __devinet_sysctl_register(net, "all", all);
1733 	if (err < 0)
1734 		goto err_reg_all;
1735 
1736 	err = __devinet_sysctl_register(net, "default", dflt);
1737 	if (err < 0)
1738 		goto err_reg_dflt;
1739 
1740 	err = -ENOMEM;
1741 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
1742 	if (forw_hdr == NULL)
1743 		goto err_reg_ctl;
1744 	net->ipv4.forw_hdr = forw_hdr;
1745 #endif
1746 
1747 	net->ipv4.devconf_all = all;
1748 	net->ipv4.devconf_dflt = dflt;
1749 	return 0;
1750 
1751 #ifdef CONFIG_SYSCTL
1752 err_reg_ctl:
1753 	__devinet_sysctl_unregister(dflt);
1754 err_reg_dflt:
1755 	__devinet_sysctl_unregister(all);
1756 err_reg_all:
1757 	if (tbl != ctl_forward_entry)
1758 		kfree(tbl);
1759 err_alloc_ctl:
1760 #endif
1761 	if (dflt != &ipv4_devconf_dflt)
1762 		kfree(dflt);
1763 err_alloc_dflt:
1764 	if (all != &ipv4_devconf)
1765 		kfree(all);
1766 err_alloc_all:
1767 	return err;
1768 }
1769 
1770 static __net_exit void devinet_exit_net(struct net *net)
1771 {
1772 #ifdef CONFIG_SYSCTL
1773 	struct ctl_table *tbl;
1774 
1775 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1776 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1777 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1778 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1779 	kfree(tbl);
1780 #endif
1781 	kfree(net->ipv4.devconf_dflt);
1782 	kfree(net->ipv4.devconf_all);
1783 }
1784 
1785 static __net_initdata struct pernet_operations devinet_ops = {
1786 	.init = devinet_init_net,
1787 	.exit = devinet_exit_net,
1788 };
1789 
1790 static struct rtnl_af_ops inet_af_ops = {
1791 	.family		  = AF_INET,
1792 	.fill_link_af	  = inet_fill_link_af,
1793 	.get_link_af_size = inet_get_link_af_size,
1794 	.validate_link_af = inet_validate_link_af,
1795 	.set_link_af	  = inet_set_link_af,
1796 };
1797 
1798 void __init devinet_init(void)
1799 {
1800 	int i;
1801 
1802 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
1803 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
1804 
1805 	register_pernet_subsys(&devinet_ops);
1806 
1807 	register_gifconf(PF_INET, inet_gifconf);
1808 	register_netdevice_notifier(&ip_netdev_notifier);
1809 
1810 	rtnl_af_register(&inet_af_ops);
1811 
1812 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1813 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1814 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1815 }
1816 
1817