xref: /openbmc/linux/net/core/neighbour.c (revision a86854d0)
1 /*
2  *	Generic address resolution entity
3  *
4  *	Authors:
5  *	Pedro Roque		<roque@di.fc.ul.pt>
6  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *	Fixes:
14  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
15  *	Harald Welte		Add neighbour cache statistics like rtstat
16  */
17 
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19 
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 #include <linux/inetdevice.h>
42 #include <net/addrconf.h>
43 
44 #define DEBUG
45 #define NEIGH_DEBUG 1
46 #define neigh_dbg(level, fmt, ...)		\
47 do {						\
48 	if (level <= NEIGH_DEBUG)		\
49 		pr_debug(fmt, ##__VA_ARGS__);	\
50 } while (0)
51 
52 #define PNEIGH_HASHMASK		0xF
53 
54 static void neigh_timer_handler(struct timer_list *t);
55 static void __neigh_notify(struct neighbour *n, int type, int flags,
56 			   u32 pid);
57 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
58 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
59 				    struct net_device *dev);
60 
61 #ifdef CONFIG_PROC_FS
62 static const struct seq_operations neigh_stat_seq_ops;
63 #endif
64 
65 /*
66    Neighbour hash table buckets are protected with rwlock tbl->lock.
67 
68    - All the scans/updates to hash buckets MUST be made under this lock.
69    - NOTHING clever should be made under this lock: no callbacks
70      to protocol backends, no attempts to send something to network.
71      It will result in deadlocks, if backend/driver wants to use neighbour
72      cache.
73    - If the entry requires some non-trivial actions, increase
74      its reference count and release table lock.
75 
76    Neighbour entries are protected:
77    - with reference count.
78    - with rwlock neigh->lock
79 
80    Reference count prevents destruction.
81 
82    neigh->lock mainly serializes ll address data and its validity state.
83    However, the same lock is used to protect another entry fields:
84     - timer
85     - resolution queue
86 
87    Again, nothing clever shall be made under neigh->lock,
88    the most complicated procedure, which we allow is dev->hard_header.
89    It is supposed, that dev->hard_header is simplistic and does
90    not make callbacks to neighbour tables.
91  */
92 
93 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
94 {
95 	kfree_skb(skb);
96 	return -ENETDOWN;
97 }
98 
99 static void neigh_cleanup_and_release(struct neighbour *neigh)
100 {
101 	if (neigh->parms->neigh_cleanup)
102 		neigh->parms->neigh_cleanup(neigh);
103 
104 	__neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
105 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
106 	neigh_release(neigh);
107 }
108 
109 /*
110  * It is random distribution in the interval (1/2)*base...(3/2)*base.
111  * It corresponds to default IPv6 settings and is not overridable,
112  * because it is really reasonable choice.
113  */
114 
115 unsigned long neigh_rand_reach_time(unsigned long base)
116 {
117 	return base ? (prandom_u32() % base) + (base >> 1) : 0;
118 }
119 EXPORT_SYMBOL(neigh_rand_reach_time);
120 
121 
122 static bool neigh_del(struct neighbour *n, __u8 state,
123 		      struct neighbour __rcu **np, struct neigh_table *tbl)
124 {
125 	bool retval = false;
126 
127 	write_lock(&n->lock);
128 	if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state)) {
129 		struct neighbour *neigh;
130 
131 		neigh = rcu_dereference_protected(n->next,
132 						  lockdep_is_held(&tbl->lock));
133 		rcu_assign_pointer(*np, neigh);
134 		n->dead = 1;
135 		retval = true;
136 	}
137 	write_unlock(&n->lock);
138 	if (retval)
139 		neigh_cleanup_and_release(n);
140 	return retval;
141 }
142 
143 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
144 {
145 	struct neigh_hash_table *nht;
146 	void *pkey = ndel->primary_key;
147 	u32 hash_val;
148 	struct neighbour *n;
149 	struct neighbour __rcu **np;
150 
151 	nht = rcu_dereference_protected(tbl->nht,
152 					lockdep_is_held(&tbl->lock));
153 	hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
154 	hash_val = hash_val >> (32 - nht->hash_shift);
155 
156 	np = &nht->hash_buckets[hash_val];
157 	while ((n = rcu_dereference_protected(*np,
158 					      lockdep_is_held(&tbl->lock)))) {
159 		if (n == ndel)
160 			return neigh_del(n, 0, np, tbl);
161 		np = &n->next;
162 	}
163 	return false;
164 }
165 
166 static int neigh_forced_gc(struct neigh_table *tbl)
167 {
168 	int shrunk = 0;
169 	int i;
170 	struct neigh_hash_table *nht;
171 
172 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
173 
174 	write_lock_bh(&tbl->lock);
175 	nht = rcu_dereference_protected(tbl->nht,
176 					lockdep_is_held(&tbl->lock));
177 	for (i = 0; i < (1 << nht->hash_shift); i++) {
178 		struct neighbour *n;
179 		struct neighbour __rcu **np;
180 
181 		np = &nht->hash_buckets[i];
182 		while ((n = rcu_dereference_protected(*np,
183 					lockdep_is_held(&tbl->lock))) != NULL) {
184 			/* Neighbour record may be discarded if:
185 			 * - nobody refers to it.
186 			 * - it is not permanent
187 			 */
188 			if (neigh_del(n, NUD_PERMANENT, np, tbl)) {
189 				shrunk = 1;
190 				continue;
191 			}
192 			np = &n->next;
193 		}
194 	}
195 
196 	tbl->last_flush = jiffies;
197 
198 	write_unlock_bh(&tbl->lock);
199 
200 	return shrunk;
201 }
202 
203 static void neigh_add_timer(struct neighbour *n, unsigned long when)
204 {
205 	neigh_hold(n);
206 	if (unlikely(mod_timer(&n->timer, when))) {
207 		printk("NEIGH: BUG, double timer add, state is %x\n",
208 		       n->nud_state);
209 		dump_stack();
210 	}
211 }
212 
213 static int neigh_del_timer(struct neighbour *n)
214 {
215 	if ((n->nud_state & NUD_IN_TIMER) &&
216 	    del_timer(&n->timer)) {
217 		neigh_release(n);
218 		return 1;
219 	}
220 	return 0;
221 }
222 
223 static void pneigh_queue_purge(struct sk_buff_head *list)
224 {
225 	struct sk_buff *skb;
226 
227 	while ((skb = skb_dequeue(list)) != NULL) {
228 		dev_put(skb->dev);
229 		kfree_skb(skb);
230 	}
231 }
232 
233 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
234 {
235 	int i;
236 	struct neigh_hash_table *nht;
237 
238 	nht = rcu_dereference_protected(tbl->nht,
239 					lockdep_is_held(&tbl->lock));
240 
241 	for (i = 0; i < (1 << nht->hash_shift); i++) {
242 		struct neighbour *n;
243 		struct neighbour __rcu **np = &nht->hash_buckets[i];
244 
245 		while ((n = rcu_dereference_protected(*np,
246 					lockdep_is_held(&tbl->lock))) != NULL) {
247 			if (dev && n->dev != dev) {
248 				np = &n->next;
249 				continue;
250 			}
251 			rcu_assign_pointer(*np,
252 				   rcu_dereference_protected(n->next,
253 						lockdep_is_held(&tbl->lock)));
254 			write_lock(&n->lock);
255 			neigh_del_timer(n);
256 			n->dead = 1;
257 
258 			if (refcount_read(&n->refcnt) != 1) {
259 				/* The most unpleasant situation.
260 				   We must destroy neighbour entry,
261 				   but someone still uses it.
262 
263 				   The destroy will be delayed until
264 				   the last user releases us, but
265 				   we must kill timers etc. and move
266 				   it to safe state.
267 				 */
268 				__skb_queue_purge(&n->arp_queue);
269 				n->arp_queue_len_bytes = 0;
270 				n->output = neigh_blackhole;
271 				if (n->nud_state & NUD_VALID)
272 					n->nud_state = NUD_NOARP;
273 				else
274 					n->nud_state = NUD_NONE;
275 				neigh_dbg(2, "neigh %p is stray\n", n);
276 			}
277 			write_unlock(&n->lock);
278 			neigh_cleanup_and_release(n);
279 		}
280 	}
281 }
282 
283 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
284 {
285 	write_lock_bh(&tbl->lock);
286 	neigh_flush_dev(tbl, dev);
287 	write_unlock_bh(&tbl->lock);
288 }
289 EXPORT_SYMBOL(neigh_changeaddr);
290 
291 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
292 {
293 	write_lock_bh(&tbl->lock);
294 	neigh_flush_dev(tbl, dev);
295 	pneigh_ifdown_and_unlock(tbl, dev);
296 
297 	del_timer_sync(&tbl->proxy_timer);
298 	pneigh_queue_purge(&tbl->proxy_queue);
299 	return 0;
300 }
301 EXPORT_SYMBOL(neigh_ifdown);
302 
303 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
304 {
305 	struct neighbour *n = NULL;
306 	unsigned long now = jiffies;
307 	int entries;
308 
309 	entries = atomic_inc_return(&tbl->entries) - 1;
310 	if (entries >= tbl->gc_thresh3 ||
311 	    (entries >= tbl->gc_thresh2 &&
312 	     time_after(now, tbl->last_flush + 5 * HZ))) {
313 		if (!neigh_forced_gc(tbl) &&
314 		    entries >= tbl->gc_thresh3) {
315 			net_info_ratelimited("%s: neighbor table overflow!\n",
316 					     tbl->id);
317 			NEIGH_CACHE_STAT_INC(tbl, table_fulls);
318 			goto out_entries;
319 		}
320 	}
321 
322 	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
323 	if (!n)
324 		goto out_entries;
325 
326 	__skb_queue_head_init(&n->arp_queue);
327 	rwlock_init(&n->lock);
328 	seqlock_init(&n->ha_lock);
329 	n->updated	  = n->used = now;
330 	n->nud_state	  = NUD_NONE;
331 	n->output	  = neigh_blackhole;
332 	seqlock_init(&n->hh.hh_lock);
333 	n->parms	  = neigh_parms_clone(&tbl->parms);
334 	timer_setup(&n->timer, neigh_timer_handler, 0);
335 
336 	NEIGH_CACHE_STAT_INC(tbl, allocs);
337 	n->tbl		  = tbl;
338 	refcount_set(&n->refcnt, 1);
339 	n->dead		  = 1;
340 out:
341 	return n;
342 
343 out_entries:
344 	atomic_dec(&tbl->entries);
345 	goto out;
346 }
347 
348 static void neigh_get_hash_rnd(u32 *x)
349 {
350 	*x = get_random_u32() | 1;
351 }
352 
353 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
354 {
355 	size_t size = (1 << shift) * sizeof(struct neighbour *);
356 	struct neigh_hash_table *ret;
357 	struct neighbour __rcu **buckets;
358 	int i;
359 
360 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
361 	if (!ret)
362 		return NULL;
363 	if (size <= PAGE_SIZE)
364 		buckets = kzalloc(size, GFP_ATOMIC);
365 	else
366 		buckets = (struct neighbour __rcu **)
367 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
368 					   get_order(size));
369 	if (!buckets) {
370 		kfree(ret);
371 		return NULL;
372 	}
373 	ret->hash_buckets = buckets;
374 	ret->hash_shift = shift;
375 	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
376 		neigh_get_hash_rnd(&ret->hash_rnd[i]);
377 	return ret;
378 }
379 
380 static void neigh_hash_free_rcu(struct rcu_head *head)
381 {
382 	struct neigh_hash_table *nht = container_of(head,
383 						    struct neigh_hash_table,
384 						    rcu);
385 	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
386 	struct neighbour __rcu **buckets = nht->hash_buckets;
387 
388 	if (size <= PAGE_SIZE)
389 		kfree(buckets);
390 	else
391 		free_pages((unsigned long)buckets, get_order(size));
392 	kfree(nht);
393 }
394 
395 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
396 						unsigned long new_shift)
397 {
398 	unsigned int i, hash;
399 	struct neigh_hash_table *new_nht, *old_nht;
400 
401 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
402 
403 	old_nht = rcu_dereference_protected(tbl->nht,
404 					    lockdep_is_held(&tbl->lock));
405 	new_nht = neigh_hash_alloc(new_shift);
406 	if (!new_nht)
407 		return old_nht;
408 
409 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
410 		struct neighbour *n, *next;
411 
412 		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
413 						   lockdep_is_held(&tbl->lock));
414 		     n != NULL;
415 		     n = next) {
416 			hash = tbl->hash(n->primary_key, n->dev,
417 					 new_nht->hash_rnd);
418 
419 			hash >>= (32 - new_nht->hash_shift);
420 			next = rcu_dereference_protected(n->next,
421 						lockdep_is_held(&tbl->lock));
422 
423 			rcu_assign_pointer(n->next,
424 					   rcu_dereference_protected(
425 						new_nht->hash_buckets[hash],
426 						lockdep_is_held(&tbl->lock)));
427 			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
428 		}
429 	}
430 
431 	rcu_assign_pointer(tbl->nht, new_nht);
432 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
433 	return new_nht;
434 }
435 
436 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
437 			       struct net_device *dev)
438 {
439 	struct neighbour *n;
440 
441 	NEIGH_CACHE_STAT_INC(tbl, lookups);
442 
443 	rcu_read_lock_bh();
444 	n = __neigh_lookup_noref(tbl, pkey, dev);
445 	if (n) {
446 		if (!refcount_inc_not_zero(&n->refcnt))
447 			n = NULL;
448 		NEIGH_CACHE_STAT_INC(tbl, hits);
449 	}
450 
451 	rcu_read_unlock_bh();
452 	return n;
453 }
454 EXPORT_SYMBOL(neigh_lookup);
455 
456 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
457 				     const void *pkey)
458 {
459 	struct neighbour *n;
460 	unsigned int key_len = tbl->key_len;
461 	u32 hash_val;
462 	struct neigh_hash_table *nht;
463 
464 	NEIGH_CACHE_STAT_INC(tbl, lookups);
465 
466 	rcu_read_lock_bh();
467 	nht = rcu_dereference_bh(tbl->nht);
468 	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
469 
470 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
471 	     n != NULL;
472 	     n = rcu_dereference_bh(n->next)) {
473 		if (!memcmp(n->primary_key, pkey, key_len) &&
474 		    net_eq(dev_net(n->dev), net)) {
475 			if (!refcount_inc_not_zero(&n->refcnt))
476 				n = NULL;
477 			NEIGH_CACHE_STAT_INC(tbl, hits);
478 			break;
479 		}
480 	}
481 
482 	rcu_read_unlock_bh();
483 	return n;
484 }
485 EXPORT_SYMBOL(neigh_lookup_nodev);
486 
487 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
488 				 struct net_device *dev, bool want_ref)
489 {
490 	u32 hash_val;
491 	unsigned int key_len = tbl->key_len;
492 	int error;
493 	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
494 	struct neigh_hash_table *nht;
495 
496 	if (!n) {
497 		rc = ERR_PTR(-ENOBUFS);
498 		goto out;
499 	}
500 
501 	memcpy(n->primary_key, pkey, key_len);
502 	n->dev = dev;
503 	dev_hold(dev);
504 
505 	/* Protocol specific setup. */
506 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
507 		rc = ERR_PTR(error);
508 		goto out_neigh_release;
509 	}
510 
511 	if (dev->netdev_ops->ndo_neigh_construct) {
512 		error = dev->netdev_ops->ndo_neigh_construct(dev, n);
513 		if (error < 0) {
514 			rc = ERR_PTR(error);
515 			goto out_neigh_release;
516 		}
517 	}
518 
519 	/* Device specific setup. */
520 	if (n->parms->neigh_setup &&
521 	    (error = n->parms->neigh_setup(n)) < 0) {
522 		rc = ERR_PTR(error);
523 		goto out_neigh_release;
524 	}
525 
526 	n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
527 
528 	write_lock_bh(&tbl->lock);
529 	nht = rcu_dereference_protected(tbl->nht,
530 					lockdep_is_held(&tbl->lock));
531 
532 	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
533 		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
534 
535 	hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
536 
537 	if (n->parms->dead) {
538 		rc = ERR_PTR(-EINVAL);
539 		goto out_tbl_unlock;
540 	}
541 
542 	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
543 					    lockdep_is_held(&tbl->lock));
544 	     n1 != NULL;
545 	     n1 = rcu_dereference_protected(n1->next,
546 			lockdep_is_held(&tbl->lock))) {
547 		if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
548 			if (want_ref)
549 				neigh_hold(n1);
550 			rc = n1;
551 			goto out_tbl_unlock;
552 		}
553 	}
554 
555 	n->dead = 0;
556 	if (want_ref)
557 		neigh_hold(n);
558 	rcu_assign_pointer(n->next,
559 			   rcu_dereference_protected(nht->hash_buckets[hash_val],
560 						     lockdep_is_held(&tbl->lock)));
561 	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
562 	write_unlock_bh(&tbl->lock);
563 	neigh_dbg(2, "neigh %p is created\n", n);
564 	rc = n;
565 out:
566 	return rc;
567 out_tbl_unlock:
568 	write_unlock_bh(&tbl->lock);
569 out_neigh_release:
570 	neigh_release(n);
571 	goto out;
572 }
573 EXPORT_SYMBOL(__neigh_create);
574 
575 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
576 {
577 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
578 	hash_val ^= (hash_val >> 16);
579 	hash_val ^= hash_val >> 8;
580 	hash_val ^= hash_val >> 4;
581 	hash_val &= PNEIGH_HASHMASK;
582 	return hash_val;
583 }
584 
585 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
586 					      struct net *net,
587 					      const void *pkey,
588 					      unsigned int key_len,
589 					      struct net_device *dev)
590 {
591 	while (n) {
592 		if (!memcmp(n->key, pkey, key_len) &&
593 		    net_eq(pneigh_net(n), net) &&
594 		    (n->dev == dev || !n->dev))
595 			return n;
596 		n = n->next;
597 	}
598 	return NULL;
599 }
600 
601 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
602 		struct net *net, const void *pkey, struct net_device *dev)
603 {
604 	unsigned int key_len = tbl->key_len;
605 	u32 hash_val = pneigh_hash(pkey, key_len);
606 
607 	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
608 				 net, pkey, key_len, dev);
609 }
610 EXPORT_SYMBOL_GPL(__pneigh_lookup);
611 
612 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
613 				    struct net *net, const void *pkey,
614 				    struct net_device *dev, int creat)
615 {
616 	struct pneigh_entry *n;
617 	unsigned int key_len = tbl->key_len;
618 	u32 hash_val = pneigh_hash(pkey, key_len);
619 
620 	read_lock_bh(&tbl->lock);
621 	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
622 			      net, pkey, key_len, dev);
623 	read_unlock_bh(&tbl->lock);
624 
625 	if (n || !creat)
626 		goto out;
627 
628 	ASSERT_RTNL();
629 
630 	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
631 	if (!n)
632 		goto out;
633 
634 	write_pnet(&n->net, net);
635 	memcpy(n->key, pkey, key_len);
636 	n->dev = dev;
637 	if (dev)
638 		dev_hold(dev);
639 
640 	if (tbl->pconstructor && tbl->pconstructor(n)) {
641 		if (dev)
642 			dev_put(dev);
643 		kfree(n);
644 		n = NULL;
645 		goto out;
646 	}
647 
648 	write_lock_bh(&tbl->lock);
649 	n->next = tbl->phash_buckets[hash_val];
650 	tbl->phash_buckets[hash_val] = n;
651 	write_unlock_bh(&tbl->lock);
652 out:
653 	return n;
654 }
655 EXPORT_SYMBOL(pneigh_lookup);
656 
657 
658 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
659 		  struct net_device *dev)
660 {
661 	struct pneigh_entry *n, **np;
662 	unsigned int key_len = tbl->key_len;
663 	u32 hash_val = pneigh_hash(pkey, key_len);
664 
665 	write_lock_bh(&tbl->lock);
666 	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
667 	     np = &n->next) {
668 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
669 		    net_eq(pneigh_net(n), net)) {
670 			*np = n->next;
671 			write_unlock_bh(&tbl->lock);
672 			if (tbl->pdestructor)
673 				tbl->pdestructor(n);
674 			if (n->dev)
675 				dev_put(n->dev);
676 			kfree(n);
677 			return 0;
678 		}
679 	}
680 	write_unlock_bh(&tbl->lock);
681 	return -ENOENT;
682 }
683 
684 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
685 				    struct net_device *dev)
686 {
687 	struct pneigh_entry *n, **np, *freelist = NULL;
688 	u32 h;
689 
690 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
691 		np = &tbl->phash_buckets[h];
692 		while ((n = *np) != NULL) {
693 			if (!dev || n->dev == dev) {
694 				*np = n->next;
695 				n->next = freelist;
696 				freelist = n;
697 				continue;
698 			}
699 			np = &n->next;
700 		}
701 	}
702 	write_unlock_bh(&tbl->lock);
703 	while ((n = freelist)) {
704 		freelist = n->next;
705 		n->next = NULL;
706 		if (tbl->pdestructor)
707 			tbl->pdestructor(n);
708 		if (n->dev)
709 			dev_put(n->dev);
710 		kfree(n);
711 	}
712 	return -ENOENT;
713 }
714 
715 static void neigh_parms_destroy(struct neigh_parms *parms);
716 
717 static inline void neigh_parms_put(struct neigh_parms *parms)
718 {
719 	if (refcount_dec_and_test(&parms->refcnt))
720 		neigh_parms_destroy(parms);
721 }
722 
723 /*
724  *	neighbour must already be out of the table;
725  *
726  */
727 void neigh_destroy(struct neighbour *neigh)
728 {
729 	struct net_device *dev = neigh->dev;
730 
731 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
732 
733 	if (!neigh->dead) {
734 		pr_warn("Destroying alive neighbour %p\n", neigh);
735 		dump_stack();
736 		return;
737 	}
738 
739 	if (neigh_del_timer(neigh))
740 		pr_warn("Impossible event\n");
741 
742 	write_lock_bh(&neigh->lock);
743 	__skb_queue_purge(&neigh->arp_queue);
744 	write_unlock_bh(&neigh->lock);
745 	neigh->arp_queue_len_bytes = 0;
746 
747 	if (dev->netdev_ops->ndo_neigh_destroy)
748 		dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
749 
750 	dev_put(dev);
751 	neigh_parms_put(neigh->parms);
752 
753 	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
754 
755 	atomic_dec(&neigh->tbl->entries);
756 	kfree_rcu(neigh, rcu);
757 }
758 EXPORT_SYMBOL(neigh_destroy);
759 
760 /* Neighbour state is suspicious;
761    disable fast path.
762 
763    Called with write_locked neigh.
764  */
765 static void neigh_suspect(struct neighbour *neigh)
766 {
767 	neigh_dbg(2, "neigh %p is suspected\n", neigh);
768 
769 	neigh->output = neigh->ops->output;
770 }
771 
772 /* Neighbour state is OK;
773    enable fast path.
774 
775    Called with write_locked neigh.
776  */
777 static void neigh_connect(struct neighbour *neigh)
778 {
779 	neigh_dbg(2, "neigh %p is connected\n", neigh);
780 
781 	neigh->output = neigh->ops->connected_output;
782 }
783 
784 static void neigh_periodic_work(struct work_struct *work)
785 {
786 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
787 	struct neighbour *n;
788 	struct neighbour __rcu **np;
789 	unsigned int i;
790 	struct neigh_hash_table *nht;
791 
792 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
793 
794 	write_lock_bh(&tbl->lock);
795 	nht = rcu_dereference_protected(tbl->nht,
796 					lockdep_is_held(&tbl->lock));
797 
798 	/*
799 	 *	periodically recompute ReachableTime from random function
800 	 */
801 
802 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
803 		struct neigh_parms *p;
804 		tbl->last_rand = jiffies;
805 		list_for_each_entry(p, &tbl->parms_list, list)
806 			p->reachable_time =
807 				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
808 	}
809 
810 	if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
811 		goto out;
812 
813 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
814 		np = &nht->hash_buckets[i];
815 
816 		while ((n = rcu_dereference_protected(*np,
817 				lockdep_is_held(&tbl->lock))) != NULL) {
818 			unsigned int state;
819 
820 			write_lock(&n->lock);
821 
822 			state = n->nud_state;
823 			if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
824 			    (n->flags & NTF_EXT_LEARNED)) {
825 				write_unlock(&n->lock);
826 				goto next_elt;
827 			}
828 
829 			if (time_before(n->used, n->confirmed))
830 				n->used = n->confirmed;
831 
832 			if (refcount_read(&n->refcnt) == 1 &&
833 			    (state == NUD_FAILED ||
834 			     time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
835 				*np = n->next;
836 				n->dead = 1;
837 				write_unlock(&n->lock);
838 				neigh_cleanup_and_release(n);
839 				continue;
840 			}
841 			write_unlock(&n->lock);
842 
843 next_elt:
844 			np = &n->next;
845 		}
846 		/*
847 		 * It's fine to release lock here, even if hash table
848 		 * grows while we are preempted.
849 		 */
850 		write_unlock_bh(&tbl->lock);
851 		cond_resched();
852 		write_lock_bh(&tbl->lock);
853 		nht = rcu_dereference_protected(tbl->nht,
854 						lockdep_is_held(&tbl->lock));
855 	}
856 out:
857 	/* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
858 	 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
859 	 * BASE_REACHABLE_TIME.
860 	 */
861 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
862 			      NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
863 	write_unlock_bh(&tbl->lock);
864 }
865 
866 static __inline__ int neigh_max_probes(struct neighbour *n)
867 {
868 	struct neigh_parms *p = n->parms;
869 	return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
870 	       (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
871 	        NEIGH_VAR(p, MCAST_PROBES));
872 }
873 
874 static void neigh_invalidate(struct neighbour *neigh)
875 	__releases(neigh->lock)
876 	__acquires(neigh->lock)
877 {
878 	struct sk_buff *skb;
879 
880 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
881 	neigh_dbg(2, "neigh %p is failed\n", neigh);
882 	neigh->updated = jiffies;
883 
884 	/* It is very thin place. report_unreachable is very complicated
885 	   routine. Particularly, it can hit the same neighbour entry!
886 
887 	   So that, we try to be accurate and avoid dead loop. --ANK
888 	 */
889 	while (neigh->nud_state == NUD_FAILED &&
890 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
891 		write_unlock(&neigh->lock);
892 		neigh->ops->error_report(neigh, skb);
893 		write_lock(&neigh->lock);
894 	}
895 	__skb_queue_purge(&neigh->arp_queue);
896 	neigh->arp_queue_len_bytes = 0;
897 }
898 
899 static void neigh_probe(struct neighbour *neigh)
900 	__releases(neigh->lock)
901 {
902 	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
903 	/* keep skb alive even if arp_queue overflows */
904 	if (skb)
905 		skb = skb_clone(skb, GFP_ATOMIC);
906 	write_unlock(&neigh->lock);
907 	if (neigh->ops->solicit)
908 		neigh->ops->solicit(neigh, skb);
909 	atomic_inc(&neigh->probes);
910 	kfree_skb(skb);
911 }
912 
913 /* Called when a timer expires for a neighbour entry. */
914 
915 static void neigh_timer_handler(struct timer_list *t)
916 {
917 	unsigned long now, next;
918 	struct neighbour *neigh = from_timer(neigh, t, timer);
919 	unsigned int state;
920 	int notify = 0;
921 
922 	write_lock(&neigh->lock);
923 
924 	state = neigh->nud_state;
925 	now = jiffies;
926 	next = now + HZ;
927 
928 	if (!(state & NUD_IN_TIMER))
929 		goto out;
930 
931 	if (state & NUD_REACHABLE) {
932 		if (time_before_eq(now,
933 				   neigh->confirmed + neigh->parms->reachable_time)) {
934 			neigh_dbg(2, "neigh %p is still alive\n", neigh);
935 			next = neigh->confirmed + neigh->parms->reachable_time;
936 		} else if (time_before_eq(now,
937 					  neigh->used +
938 					  NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
939 			neigh_dbg(2, "neigh %p is delayed\n", neigh);
940 			neigh->nud_state = NUD_DELAY;
941 			neigh->updated = jiffies;
942 			neigh_suspect(neigh);
943 			next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
944 		} else {
945 			neigh_dbg(2, "neigh %p is suspected\n", neigh);
946 			neigh->nud_state = NUD_STALE;
947 			neigh->updated = jiffies;
948 			neigh_suspect(neigh);
949 			notify = 1;
950 		}
951 	} else if (state & NUD_DELAY) {
952 		if (time_before_eq(now,
953 				   neigh->confirmed +
954 				   NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
955 			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
956 			neigh->nud_state = NUD_REACHABLE;
957 			neigh->updated = jiffies;
958 			neigh_connect(neigh);
959 			notify = 1;
960 			next = neigh->confirmed + neigh->parms->reachable_time;
961 		} else {
962 			neigh_dbg(2, "neigh %p is probed\n", neigh);
963 			neigh->nud_state = NUD_PROBE;
964 			neigh->updated = jiffies;
965 			atomic_set(&neigh->probes, 0);
966 			notify = 1;
967 			next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
968 		}
969 	} else {
970 		/* NUD_PROBE|NUD_INCOMPLETE */
971 		next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
972 	}
973 
974 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
975 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
976 		neigh->nud_state = NUD_FAILED;
977 		notify = 1;
978 		neigh_invalidate(neigh);
979 		goto out;
980 	}
981 
982 	if (neigh->nud_state & NUD_IN_TIMER) {
983 		if (time_before(next, jiffies + HZ/2))
984 			next = jiffies + HZ/2;
985 		if (!mod_timer(&neigh->timer, next))
986 			neigh_hold(neigh);
987 	}
988 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
989 		neigh_probe(neigh);
990 	} else {
991 out:
992 		write_unlock(&neigh->lock);
993 	}
994 
995 	if (notify)
996 		neigh_update_notify(neigh, 0);
997 
998 	neigh_release(neigh);
999 }
1000 
1001 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1002 {
1003 	int rc;
1004 	bool immediate_probe = false;
1005 
1006 	write_lock_bh(&neigh->lock);
1007 
1008 	rc = 0;
1009 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1010 		goto out_unlock_bh;
1011 	if (neigh->dead)
1012 		goto out_dead;
1013 
1014 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1015 		if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1016 		    NEIGH_VAR(neigh->parms, APP_PROBES)) {
1017 			unsigned long next, now = jiffies;
1018 
1019 			atomic_set(&neigh->probes,
1020 				   NEIGH_VAR(neigh->parms, UCAST_PROBES));
1021 			neigh->nud_state     = NUD_INCOMPLETE;
1022 			neigh->updated = now;
1023 			next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1024 					 HZ/2);
1025 			neigh_add_timer(neigh, next);
1026 			immediate_probe = true;
1027 		} else {
1028 			neigh->nud_state = NUD_FAILED;
1029 			neigh->updated = jiffies;
1030 			write_unlock_bh(&neigh->lock);
1031 
1032 			kfree_skb(skb);
1033 			return 1;
1034 		}
1035 	} else if (neigh->nud_state & NUD_STALE) {
1036 		neigh_dbg(2, "neigh %p is delayed\n", neigh);
1037 		neigh->nud_state = NUD_DELAY;
1038 		neigh->updated = jiffies;
1039 		neigh_add_timer(neigh, jiffies +
1040 				NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1041 	}
1042 
1043 	if (neigh->nud_state == NUD_INCOMPLETE) {
1044 		if (skb) {
1045 			while (neigh->arp_queue_len_bytes + skb->truesize >
1046 			       NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1047 				struct sk_buff *buff;
1048 
1049 				buff = __skb_dequeue(&neigh->arp_queue);
1050 				if (!buff)
1051 					break;
1052 				neigh->arp_queue_len_bytes -= buff->truesize;
1053 				kfree_skb(buff);
1054 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1055 			}
1056 			skb_dst_force(skb);
1057 			__skb_queue_tail(&neigh->arp_queue, skb);
1058 			neigh->arp_queue_len_bytes += skb->truesize;
1059 		}
1060 		rc = 1;
1061 	}
1062 out_unlock_bh:
1063 	if (immediate_probe)
1064 		neigh_probe(neigh);
1065 	else
1066 		write_unlock(&neigh->lock);
1067 	local_bh_enable();
1068 	return rc;
1069 
1070 out_dead:
1071 	if (neigh->nud_state & NUD_STALE)
1072 		goto out_unlock_bh;
1073 	write_unlock_bh(&neigh->lock);
1074 	kfree_skb(skb);
1075 	return 1;
1076 }
1077 EXPORT_SYMBOL(__neigh_event_send);
1078 
1079 static void neigh_update_hhs(struct neighbour *neigh)
1080 {
1081 	struct hh_cache *hh;
1082 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1083 		= NULL;
1084 
1085 	if (neigh->dev->header_ops)
1086 		update = neigh->dev->header_ops->cache_update;
1087 
1088 	if (update) {
1089 		hh = &neigh->hh;
1090 		if (hh->hh_len) {
1091 			write_seqlock_bh(&hh->hh_lock);
1092 			update(hh, neigh->dev, neigh->ha);
1093 			write_sequnlock_bh(&hh->hh_lock);
1094 		}
1095 	}
1096 }
1097 
1098 
1099 
1100 /* Generic update routine.
1101    -- lladdr is new lladdr or NULL, if it is not supplied.
1102    -- new    is new state.
1103    -- flags
1104 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1105 				if it is different.
1106 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1107 				lladdr instead of overriding it
1108 				if it is different.
1109 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1110 
1111 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1112 				NTF_ROUTER flag.
1113 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1114 				a router.
1115 
1116    Caller MUST hold reference count on the entry.
1117  */
1118 
1119 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1120 		 u32 flags, u32 nlmsg_pid)
1121 {
1122 	u8 old;
1123 	int err;
1124 	int notify = 0;
1125 	struct net_device *dev;
1126 	int update_isrouter = 0;
1127 
1128 	write_lock_bh(&neigh->lock);
1129 
1130 	dev    = neigh->dev;
1131 	old    = neigh->nud_state;
1132 	err    = -EPERM;
1133 
1134 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1135 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1136 		goto out;
1137 	if (neigh->dead)
1138 		goto out;
1139 
1140 	neigh_update_ext_learned(neigh, flags, &notify);
1141 
1142 	if (!(new & NUD_VALID)) {
1143 		neigh_del_timer(neigh);
1144 		if (old & NUD_CONNECTED)
1145 			neigh_suspect(neigh);
1146 		neigh->nud_state = new;
1147 		err = 0;
1148 		notify = old & NUD_VALID;
1149 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1150 		    (new & NUD_FAILED)) {
1151 			neigh_invalidate(neigh);
1152 			notify = 1;
1153 		}
1154 		goto out;
1155 	}
1156 
1157 	/* Compare new lladdr with cached one */
1158 	if (!dev->addr_len) {
1159 		/* First case: device needs no address. */
1160 		lladdr = neigh->ha;
1161 	} else if (lladdr) {
1162 		/* The second case: if something is already cached
1163 		   and a new address is proposed:
1164 		   - compare new & old
1165 		   - if they are different, check override flag
1166 		 */
1167 		if ((old & NUD_VALID) &&
1168 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1169 			lladdr = neigh->ha;
1170 	} else {
1171 		/* No address is supplied; if we know something,
1172 		   use it, otherwise discard the request.
1173 		 */
1174 		err = -EINVAL;
1175 		if (!(old & NUD_VALID))
1176 			goto out;
1177 		lladdr = neigh->ha;
1178 	}
1179 
1180 	/* If entry was valid and address is not changed,
1181 	   do not change entry state, if new one is STALE.
1182 	 */
1183 	err = 0;
1184 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1185 	if (old & NUD_VALID) {
1186 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1187 			update_isrouter = 0;
1188 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1189 			    (old & NUD_CONNECTED)) {
1190 				lladdr = neigh->ha;
1191 				new = NUD_STALE;
1192 			} else
1193 				goto out;
1194 		} else {
1195 			if (lladdr == neigh->ha && new == NUD_STALE &&
1196 			    !(flags & NEIGH_UPDATE_F_ADMIN))
1197 				new = old;
1198 		}
1199 	}
1200 
1201 	/* Update timestamps only once we know we will make a change to the
1202 	 * neighbour entry. Otherwise we risk to move the locktime window with
1203 	 * noop updates and ignore relevant ARP updates.
1204 	 */
1205 	if (new != old || lladdr != neigh->ha) {
1206 		if (new & NUD_CONNECTED)
1207 			neigh->confirmed = jiffies;
1208 		neigh->updated = jiffies;
1209 	}
1210 
1211 	if (new != old) {
1212 		neigh_del_timer(neigh);
1213 		if (new & NUD_PROBE)
1214 			atomic_set(&neigh->probes, 0);
1215 		if (new & NUD_IN_TIMER)
1216 			neigh_add_timer(neigh, (jiffies +
1217 						((new & NUD_REACHABLE) ?
1218 						 neigh->parms->reachable_time :
1219 						 0)));
1220 		neigh->nud_state = new;
1221 		notify = 1;
1222 	}
1223 
1224 	if (lladdr != neigh->ha) {
1225 		write_seqlock(&neigh->ha_lock);
1226 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1227 		write_sequnlock(&neigh->ha_lock);
1228 		neigh_update_hhs(neigh);
1229 		if (!(new & NUD_CONNECTED))
1230 			neigh->confirmed = jiffies -
1231 				      (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1232 		notify = 1;
1233 	}
1234 	if (new == old)
1235 		goto out;
1236 	if (new & NUD_CONNECTED)
1237 		neigh_connect(neigh);
1238 	else
1239 		neigh_suspect(neigh);
1240 	if (!(old & NUD_VALID)) {
1241 		struct sk_buff *skb;
1242 
1243 		/* Again: avoid dead loop if something went wrong */
1244 
1245 		while (neigh->nud_state & NUD_VALID &&
1246 		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1247 			struct dst_entry *dst = skb_dst(skb);
1248 			struct neighbour *n2, *n1 = neigh;
1249 			write_unlock_bh(&neigh->lock);
1250 
1251 			rcu_read_lock();
1252 
1253 			/* Why not just use 'neigh' as-is?  The problem is that
1254 			 * things such as shaper, eql, and sch_teql can end up
1255 			 * using alternative, different, neigh objects to output
1256 			 * the packet in the output path.  So what we need to do
1257 			 * here is re-lookup the top-level neigh in the path so
1258 			 * we can reinject the packet there.
1259 			 */
1260 			n2 = NULL;
1261 			if (dst) {
1262 				n2 = dst_neigh_lookup_skb(dst, skb);
1263 				if (n2)
1264 					n1 = n2;
1265 			}
1266 			n1->output(n1, skb);
1267 			if (n2)
1268 				neigh_release(n2);
1269 			rcu_read_unlock();
1270 
1271 			write_lock_bh(&neigh->lock);
1272 		}
1273 		__skb_queue_purge(&neigh->arp_queue);
1274 		neigh->arp_queue_len_bytes = 0;
1275 	}
1276 out:
1277 	if (update_isrouter) {
1278 		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1279 			(neigh->flags | NTF_ROUTER) :
1280 			(neigh->flags & ~NTF_ROUTER);
1281 	}
1282 	write_unlock_bh(&neigh->lock);
1283 
1284 	if (notify)
1285 		neigh_update_notify(neigh, nlmsg_pid);
1286 
1287 	return err;
1288 }
1289 EXPORT_SYMBOL(neigh_update);
1290 
1291 /* Update the neigh to listen temporarily for probe responses, even if it is
1292  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1293  */
1294 void __neigh_set_probe_once(struct neighbour *neigh)
1295 {
1296 	if (neigh->dead)
1297 		return;
1298 	neigh->updated = jiffies;
1299 	if (!(neigh->nud_state & NUD_FAILED))
1300 		return;
1301 	neigh->nud_state = NUD_INCOMPLETE;
1302 	atomic_set(&neigh->probes, neigh_max_probes(neigh));
1303 	neigh_add_timer(neigh,
1304 			jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1305 }
1306 EXPORT_SYMBOL(__neigh_set_probe_once);
1307 
1308 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1309 				 u8 *lladdr, void *saddr,
1310 				 struct net_device *dev)
1311 {
1312 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1313 						 lladdr || !dev->addr_len);
1314 	if (neigh)
1315 		neigh_update(neigh, lladdr, NUD_STALE,
1316 			     NEIGH_UPDATE_F_OVERRIDE, 0);
1317 	return neigh;
1318 }
1319 EXPORT_SYMBOL(neigh_event_ns);
1320 
1321 /* called with read_lock_bh(&n->lock); */
1322 static void neigh_hh_init(struct neighbour *n)
1323 {
1324 	struct net_device *dev = n->dev;
1325 	__be16 prot = n->tbl->protocol;
1326 	struct hh_cache	*hh = &n->hh;
1327 
1328 	write_lock_bh(&n->lock);
1329 
1330 	/* Only one thread can come in here and initialize the
1331 	 * hh_cache entry.
1332 	 */
1333 	if (!hh->hh_len)
1334 		dev->header_ops->cache(n, hh, prot);
1335 
1336 	write_unlock_bh(&n->lock);
1337 }
1338 
1339 /* Slow and careful. */
1340 
1341 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1342 {
1343 	int rc = 0;
1344 
1345 	if (!neigh_event_send(neigh, skb)) {
1346 		int err;
1347 		struct net_device *dev = neigh->dev;
1348 		unsigned int seq;
1349 
1350 		if (dev->header_ops->cache && !neigh->hh.hh_len)
1351 			neigh_hh_init(neigh);
1352 
1353 		do {
1354 			__skb_pull(skb, skb_network_offset(skb));
1355 			seq = read_seqbegin(&neigh->ha_lock);
1356 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1357 					      neigh->ha, NULL, skb->len);
1358 		} while (read_seqretry(&neigh->ha_lock, seq));
1359 
1360 		if (err >= 0)
1361 			rc = dev_queue_xmit(skb);
1362 		else
1363 			goto out_kfree_skb;
1364 	}
1365 out:
1366 	return rc;
1367 out_kfree_skb:
1368 	rc = -EINVAL;
1369 	kfree_skb(skb);
1370 	goto out;
1371 }
1372 EXPORT_SYMBOL(neigh_resolve_output);
1373 
1374 /* As fast as possible without hh cache */
1375 
1376 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1377 {
1378 	struct net_device *dev = neigh->dev;
1379 	unsigned int seq;
1380 	int err;
1381 
1382 	do {
1383 		__skb_pull(skb, skb_network_offset(skb));
1384 		seq = read_seqbegin(&neigh->ha_lock);
1385 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1386 				      neigh->ha, NULL, skb->len);
1387 	} while (read_seqretry(&neigh->ha_lock, seq));
1388 
1389 	if (err >= 0)
1390 		err = dev_queue_xmit(skb);
1391 	else {
1392 		err = -EINVAL;
1393 		kfree_skb(skb);
1394 	}
1395 	return err;
1396 }
1397 EXPORT_SYMBOL(neigh_connected_output);
1398 
1399 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1400 {
1401 	return dev_queue_xmit(skb);
1402 }
1403 EXPORT_SYMBOL(neigh_direct_output);
1404 
1405 static void neigh_proxy_process(struct timer_list *t)
1406 {
1407 	struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1408 	long sched_next = 0;
1409 	unsigned long now = jiffies;
1410 	struct sk_buff *skb, *n;
1411 
1412 	spin_lock(&tbl->proxy_queue.lock);
1413 
1414 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1415 		long tdif = NEIGH_CB(skb)->sched_next - now;
1416 
1417 		if (tdif <= 0) {
1418 			struct net_device *dev = skb->dev;
1419 
1420 			__skb_unlink(skb, &tbl->proxy_queue);
1421 			if (tbl->proxy_redo && netif_running(dev)) {
1422 				rcu_read_lock();
1423 				tbl->proxy_redo(skb);
1424 				rcu_read_unlock();
1425 			} else {
1426 				kfree_skb(skb);
1427 			}
1428 
1429 			dev_put(dev);
1430 		} else if (!sched_next || tdif < sched_next)
1431 			sched_next = tdif;
1432 	}
1433 	del_timer(&tbl->proxy_timer);
1434 	if (sched_next)
1435 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1436 	spin_unlock(&tbl->proxy_queue.lock);
1437 }
1438 
1439 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1440 		    struct sk_buff *skb)
1441 {
1442 	unsigned long now = jiffies;
1443 
1444 	unsigned long sched_next = now + (prandom_u32() %
1445 					  NEIGH_VAR(p, PROXY_DELAY));
1446 
1447 	if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1448 		kfree_skb(skb);
1449 		return;
1450 	}
1451 
1452 	NEIGH_CB(skb)->sched_next = sched_next;
1453 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1454 
1455 	spin_lock(&tbl->proxy_queue.lock);
1456 	if (del_timer(&tbl->proxy_timer)) {
1457 		if (time_before(tbl->proxy_timer.expires, sched_next))
1458 			sched_next = tbl->proxy_timer.expires;
1459 	}
1460 	skb_dst_drop(skb);
1461 	dev_hold(skb->dev);
1462 	__skb_queue_tail(&tbl->proxy_queue, skb);
1463 	mod_timer(&tbl->proxy_timer, sched_next);
1464 	spin_unlock(&tbl->proxy_queue.lock);
1465 }
1466 EXPORT_SYMBOL(pneigh_enqueue);
1467 
1468 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1469 						      struct net *net, int ifindex)
1470 {
1471 	struct neigh_parms *p;
1472 
1473 	list_for_each_entry(p, &tbl->parms_list, list) {
1474 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1475 		    (!p->dev && !ifindex && net_eq(net, &init_net)))
1476 			return p;
1477 	}
1478 
1479 	return NULL;
1480 }
1481 
1482 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1483 				      struct neigh_table *tbl)
1484 {
1485 	struct neigh_parms *p;
1486 	struct net *net = dev_net(dev);
1487 	const struct net_device_ops *ops = dev->netdev_ops;
1488 
1489 	p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1490 	if (p) {
1491 		p->tbl		  = tbl;
1492 		refcount_set(&p->refcnt, 1);
1493 		p->reachable_time =
1494 				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1495 		dev_hold(dev);
1496 		p->dev = dev;
1497 		write_pnet(&p->net, net);
1498 		p->sysctl_table = NULL;
1499 
1500 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1501 			dev_put(dev);
1502 			kfree(p);
1503 			return NULL;
1504 		}
1505 
1506 		write_lock_bh(&tbl->lock);
1507 		list_add(&p->list, &tbl->parms.list);
1508 		write_unlock_bh(&tbl->lock);
1509 
1510 		neigh_parms_data_state_cleanall(p);
1511 	}
1512 	return p;
1513 }
1514 EXPORT_SYMBOL(neigh_parms_alloc);
1515 
1516 static void neigh_rcu_free_parms(struct rcu_head *head)
1517 {
1518 	struct neigh_parms *parms =
1519 		container_of(head, struct neigh_parms, rcu_head);
1520 
1521 	neigh_parms_put(parms);
1522 }
1523 
1524 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1525 {
1526 	if (!parms || parms == &tbl->parms)
1527 		return;
1528 	write_lock_bh(&tbl->lock);
1529 	list_del(&parms->list);
1530 	parms->dead = 1;
1531 	write_unlock_bh(&tbl->lock);
1532 	if (parms->dev)
1533 		dev_put(parms->dev);
1534 	call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1535 }
1536 EXPORT_SYMBOL(neigh_parms_release);
1537 
1538 static void neigh_parms_destroy(struct neigh_parms *parms)
1539 {
1540 	kfree(parms);
1541 }
1542 
1543 static struct lock_class_key neigh_table_proxy_queue_class;
1544 
1545 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1546 
1547 void neigh_table_init(int index, struct neigh_table *tbl)
1548 {
1549 	unsigned long now = jiffies;
1550 	unsigned long phsize;
1551 
1552 	INIT_LIST_HEAD(&tbl->parms_list);
1553 	list_add(&tbl->parms.list, &tbl->parms_list);
1554 	write_pnet(&tbl->parms.net, &init_net);
1555 	refcount_set(&tbl->parms.refcnt, 1);
1556 	tbl->parms.reachable_time =
1557 			  neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1558 
1559 	tbl->stats = alloc_percpu(struct neigh_statistics);
1560 	if (!tbl->stats)
1561 		panic("cannot create neighbour cache statistics");
1562 
1563 #ifdef CONFIG_PROC_FS
1564 	if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1565 			      &neigh_stat_seq_ops, tbl))
1566 		panic("cannot create neighbour proc dir entry");
1567 #endif
1568 
1569 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1570 
1571 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1572 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1573 
1574 	if (!tbl->nht || !tbl->phash_buckets)
1575 		panic("cannot allocate neighbour cache hashes");
1576 
1577 	if (!tbl->entry_size)
1578 		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1579 					tbl->key_len, NEIGH_PRIV_ALIGN);
1580 	else
1581 		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1582 
1583 	rwlock_init(&tbl->lock);
1584 	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1585 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1586 			tbl->parms.reachable_time);
1587 	timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1588 	skb_queue_head_init_class(&tbl->proxy_queue,
1589 			&neigh_table_proxy_queue_class);
1590 
1591 	tbl->last_flush = now;
1592 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1593 
1594 	neigh_tables[index] = tbl;
1595 }
1596 EXPORT_SYMBOL(neigh_table_init);
1597 
1598 int neigh_table_clear(int index, struct neigh_table *tbl)
1599 {
1600 	neigh_tables[index] = NULL;
1601 	/* It is not clean... Fix it to unload IPv6 module safely */
1602 	cancel_delayed_work_sync(&tbl->gc_work);
1603 	del_timer_sync(&tbl->proxy_timer);
1604 	pneigh_queue_purge(&tbl->proxy_queue);
1605 	neigh_ifdown(tbl, NULL);
1606 	if (atomic_read(&tbl->entries))
1607 		pr_crit("neighbour leakage\n");
1608 
1609 	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1610 		 neigh_hash_free_rcu);
1611 	tbl->nht = NULL;
1612 
1613 	kfree(tbl->phash_buckets);
1614 	tbl->phash_buckets = NULL;
1615 
1616 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1617 
1618 	free_percpu(tbl->stats);
1619 	tbl->stats = NULL;
1620 
1621 	return 0;
1622 }
1623 EXPORT_SYMBOL(neigh_table_clear);
1624 
1625 static struct neigh_table *neigh_find_table(int family)
1626 {
1627 	struct neigh_table *tbl = NULL;
1628 
1629 	switch (family) {
1630 	case AF_INET:
1631 		tbl = neigh_tables[NEIGH_ARP_TABLE];
1632 		break;
1633 	case AF_INET6:
1634 		tbl = neigh_tables[NEIGH_ND_TABLE];
1635 		break;
1636 	case AF_DECnet:
1637 		tbl = neigh_tables[NEIGH_DN_TABLE];
1638 		break;
1639 	}
1640 
1641 	return tbl;
1642 }
1643 
1644 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1645 			struct netlink_ext_ack *extack)
1646 {
1647 	struct net *net = sock_net(skb->sk);
1648 	struct ndmsg *ndm;
1649 	struct nlattr *dst_attr;
1650 	struct neigh_table *tbl;
1651 	struct neighbour *neigh;
1652 	struct net_device *dev = NULL;
1653 	int err = -EINVAL;
1654 
1655 	ASSERT_RTNL();
1656 	if (nlmsg_len(nlh) < sizeof(*ndm))
1657 		goto out;
1658 
1659 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1660 	if (dst_attr == NULL)
1661 		goto out;
1662 
1663 	ndm = nlmsg_data(nlh);
1664 	if (ndm->ndm_ifindex) {
1665 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1666 		if (dev == NULL) {
1667 			err = -ENODEV;
1668 			goto out;
1669 		}
1670 	}
1671 
1672 	tbl = neigh_find_table(ndm->ndm_family);
1673 	if (tbl == NULL)
1674 		return -EAFNOSUPPORT;
1675 
1676 	if (nla_len(dst_attr) < (int)tbl->key_len)
1677 		goto out;
1678 
1679 	if (ndm->ndm_flags & NTF_PROXY) {
1680 		err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1681 		goto out;
1682 	}
1683 
1684 	if (dev == NULL)
1685 		goto out;
1686 
1687 	neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1688 	if (neigh == NULL) {
1689 		err = -ENOENT;
1690 		goto out;
1691 	}
1692 
1693 	err = neigh_update(neigh, NULL, NUD_FAILED,
1694 			   NEIGH_UPDATE_F_OVERRIDE |
1695 			   NEIGH_UPDATE_F_ADMIN,
1696 			   NETLINK_CB(skb).portid);
1697 	write_lock_bh(&tbl->lock);
1698 	neigh_release(neigh);
1699 	neigh_remove_one(neigh, tbl);
1700 	write_unlock_bh(&tbl->lock);
1701 
1702 out:
1703 	return err;
1704 }
1705 
1706 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1707 		     struct netlink_ext_ack *extack)
1708 {
1709 	int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1710 	struct net *net = sock_net(skb->sk);
1711 	struct ndmsg *ndm;
1712 	struct nlattr *tb[NDA_MAX+1];
1713 	struct neigh_table *tbl;
1714 	struct net_device *dev = NULL;
1715 	struct neighbour *neigh;
1716 	void *dst, *lladdr;
1717 	int err;
1718 
1719 	ASSERT_RTNL();
1720 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
1721 	if (err < 0)
1722 		goto out;
1723 
1724 	err = -EINVAL;
1725 	if (tb[NDA_DST] == NULL)
1726 		goto out;
1727 
1728 	ndm = nlmsg_data(nlh);
1729 	if (ndm->ndm_ifindex) {
1730 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1731 		if (dev == NULL) {
1732 			err = -ENODEV;
1733 			goto out;
1734 		}
1735 
1736 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1737 			goto out;
1738 	}
1739 
1740 	tbl = neigh_find_table(ndm->ndm_family);
1741 	if (tbl == NULL)
1742 		return -EAFNOSUPPORT;
1743 
1744 	if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
1745 		goto out;
1746 	dst = nla_data(tb[NDA_DST]);
1747 	lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1748 
1749 	if (ndm->ndm_flags & NTF_PROXY) {
1750 		struct pneigh_entry *pn;
1751 
1752 		err = -ENOBUFS;
1753 		pn = pneigh_lookup(tbl, net, dst, dev, 1);
1754 		if (pn) {
1755 			pn->flags = ndm->ndm_flags;
1756 			err = 0;
1757 		}
1758 		goto out;
1759 	}
1760 
1761 	if (dev == NULL)
1762 		goto out;
1763 
1764 	neigh = neigh_lookup(tbl, dst, dev);
1765 	if (neigh == NULL) {
1766 		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1767 			err = -ENOENT;
1768 			goto out;
1769 		}
1770 
1771 		neigh = __neigh_lookup_errno(tbl, dst, dev);
1772 		if (IS_ERR(neigh)) {
1773 			err = PTR_ERR(neigh);
1774 			goto out;
1775 		}
1776 	} else {
1777 		if (nlh->nlmsg_flags & NLM_F_EXCL) {
1778 			err = -EEXIST;
1779 			neigh_release(neigh);
1780 			goto out;
1781 		}
1782 
1783 		if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1784 			flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1785 	}
1786 
1787 	if (ndm->ndm_flags & NTF_EXT_LEARNED)
1788 		flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1789 
1790 	if (ndm->ndm_flags & NTF_USE) {
1791 		neigh_event_send(neigh, NULL);
1792 		err = 0;
1793 	} else
1794 		err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1795 				   NETLINK_CB(skb).portid);
1796 	neigh_release(neigh);
1797 
1798 out:
1799 	return err;
1800 }
1801 
1802 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1803 {
1804 	struct nlattr *nest;
1805 
1806 	nest = nla_nest_start(skb, NDTA_PARMS);
1807 	if (nest == NULL)
1808 		return -ENOBUFS;
1809 
1810 	if ((parms->dev &&
1811 	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1812 	    nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1813 	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1814 			NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1815 	    /* approximative value for deprecated QUEUE_LEN (in packets) */
1816 	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
1817 			NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1818 	    nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1819 	    nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1820 	    nla_put_u32(skb, NDTPA_UCAST_PROBES,
1821 			NEIGH_VAR(parms, UCAST_PROBES)) ||
1822 	    nla_put_u32(skb, NDTPA_MCAST_PROBES,
1823 			NEIGH_VAR(parms, MCAST_PROBES)) ||
1824 	    nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1825 			NEIGH_VAR(parms, MCAST_REPROBES)) ||
1826 	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1827 			  NDTPA_PAD) ||
1828 	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1829 			  NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1830 	    nla_put_msecs(skb, NDTPA_GC_STALETIME,
1831 			  NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1832 	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1833 			  NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1834 	    nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1835 			  NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1836 	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1837 			  NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1838 	    nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1839 			  NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
1840 	    nla_put_msecs(skb, NDTPA_LOCKTIME,
1841 			  NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
1842 		goto nla_put_failure;
1843 	return nla_nest_end(skb, nest);
1844 
1845 nla_put_failure:
1846 	nla_nest_cancel(skb, nest);
1847 	return -EMSGSIZE;
1848 }
1849 
1850 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1851 			      u32 pid, u32 seq, int type, int flags)
1852 {
1853 	struct nlmsghdr *nlh;
1854 	struct ndtmsg *ndtmsg;
1855 
1856 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1857 	if (nlh == NULL)
1858 		return -EMSGSIZE;
1859 
1860 	ndtmsg = nlmsg_data(nlh);
1861 
1862 	read_lock_bh(&tbl->lock);
1863 	ndtmsg->ndtm_family = tbl->family;
1864 	ndtmsg->ndtm_pad1   = 0;
1865 	ndtmsg->ndtm_pad2   = 0;
1866 
1867 	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1868 	    nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
1869 	    nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1870 	    nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1871 	    nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1872 		goto nla_put_failure;
1873 	{
1874 		unsigned long now = jiffies;
1875 		unsigned int flush_delta = now - tbl->last_flush;
1876 		unsigned int rand_delta = now - tbl->last_rand;
1877 		struct neigh_hash_table *nht;
1878 		struct ndt_config ndc = {
1879 			.ndtc_key_len		= tbl->key_len,
1880 			.ndtc_entry_size	= tbl->entry_size,
1881 			.ndtc_entries		= atomic_read(&tbl->entries),
1882 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
1883 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
1884 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
1885 		};
1886 
1887 		rcu_read_lock_bh();
1888 		nht = rcu_dereference_bh(tbl->nht);
1889 		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1890 		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1891 		rcu_read_unlock_bh();
1892 
1893 		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1894 			goto nla_put_failure;
1895 	}
1896 
1897 	{
1898 		int cpu;
1899 		struct ndt_stats ndst;
1900 
1901 		memset(&ndst, 0, sizeof(ndst));
1902 
1903 		for_each_possible_cpu(cpu) {
1904 			struct neigh_statistics	*st;
1905 
1906 			st = per_cpu_ptr(tbl->stats, cpu);
1907 			ndst.ndts_allocs		+= st->allocs;
1908 			ndst.ndts_destroys		+= st->destroys;
1909 			ndst.ndts_hash_grows		+= st->hash_grows;
1910 			ndst.ndts_res_failed		+= st->res_failed;
1911 			ndst.ndts_lookups		+= st->lookups;
1912 			ndst.ndts_hits			+= st->hits;
1913 			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
1914 			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
1915 			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
1916 			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
1917 			ndst.ndts_table_fulls		+= st->table_fulls;
1918 		}
1919 
1920 		if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
1921 				  NDTA_PAD))
1922 			goto nla_put_failure;
1923 	}
1924 
1925 	BUG_ON(tbl->parms.dev);
1926 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1927 		goto nla_put_failure;
1928 
1929 	read_unlock_bh(&tbl->lock);
1930 	nlmsg_end(skb, nlh);
1931 	return 0;
1932 
1933 nla_put_failure:
1934 	read_unlock_bh(&tbl->lock);
1935 	nlmsg_cancel(skb, nlh);
1936 	return -EMSGSIZE;
1937 }
1938 
1939 static int neightbl_fill_param_info(struct sk_buff *skb,
1940 				    struct neigh_table *tbl,
1941 				    struct neigh_parms *parms,
1942 				    u32 pid, u32 seq, int type,
1943 				    unsigned int flags)
1944 {
1945 	struct ndtmsg *ndtmsg;
1946 	struct nlmsghdr *nlh;
1947 
1948 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1949 	if (nlh == NULL)
1950 		return -EMSGSIZE;
1951 
1952 	ndtmsg = nlmsg_data(nlh);
1953 
1954 	read_lock_bh(&tbl->lock);
1955 	ndtmsg->ndtm_family = tbl->family;
1956 	ndtmsg->ndtm_pad1   = 0;
1957 	ndtmsg->ndtm_pad2   = 0;
1958 
1959 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1960 	    neightbl_fill_parms(skb, parms) < 0)
1961 		goto errout;
1962 
1963 	read_unlock_bh(&tbl->lock);
1964 	nlmsg_end(skb, nlh);
1965 	return 0;
1966 errout:
1967 	read_unlock_bh(&tbl->lock);
1968 	nlmsg_cancel(skb, nlh);
1969 	return -EMSGSIZE;
1970 }
1971 
1972 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1973 	[NDTA_NAME]		= { .type = NLA_STRING },
1974 	[NDTA_THRESH1]		= { .type = NLA_U32 },
1975 	[NDTA_THRESH2]		= { .type = NLA_U32 },
1976 	[NDTA_THRESH3]		= { .type = NLA_U32 },
1977 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
1978 	[NDTA_PARMS]		= { .type = NLA_NESTED },
1979 };
1980 
1981 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1982 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
1983 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
1984 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
1985 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
1986 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
1987 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
1988 	[NDTPA_MCAST_REPROBES]		= { .type = NLA_U32 },
1989 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
1990 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
1991 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
1992 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
1993 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
1994 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
1995 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
1996 };
1997 
1998 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
1999 			struct netlink_ext_ack *extack)
2000 {
2001 	struct net *net = sock_net(skb->sk);
2002 	struct neigh_table *tbl;
2003 	struct ndtmsg *ndtmsg;
2004 	struct nlattr *tb[NDTA_MAX+1];
2005 	bool found = false;
2006 	int err, tidx;
2007 
2008 	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2009 			  nl_neightbl_policy, extack);
2010 	if (err < 0)
2011 		goto errout;
2012 
2013 	if (tb[NDTA_NAME] == NULL) {
2014 		err = -EINVAL;
2015 		goto errout;
2016 	}
2017 
2018 	ndtmsg = nlmsg_data(nlh);
2019 
2020 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2021 		tbl = neigh_tables[tidx];
2022 		if (!tbl)
2023 			continue;
2024 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2025 			continue;
2026 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2027 			found = true;
2028 			break;
2029 		}
2030 	}
2031 
2032 	if (!found)
2033 		return -ENOENT;
2034 
2035 	/*
2036 	 * We acquire tbl->lock to be nice to the periodic timers and
2037 	 * make sure they always see a consistent set of values.
2038 	 */
2039 	write_lock_bh(&tbl->lock);
2040 
2041 	if (tb[NDTA_PARMS]) {
2042 		struct nlattr *tbp[NDTPA_MAX+1];
2043 		struct neigh_parms *p;
2044 		int i, ifindex = 0;
2045 
2046 		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2047 				       nl_ntbl_parm_policy, extack);
2048 		if (err < 0)
2049 			goto errout_tbl_lock;
2050 
2051 		if (tbp[NDTPA_IFINDEX])
2052 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2053 
2054 		p = lookup_neigh_parms(tbl, net, ifindex);
2055 		if (p == NULL) {
2056 			err = -ENOENT;
2057 			goto errout_tbl_lock;
2058 		}
2059 
2060 		for (i = 1; i <= NDTPA_MAX; i++) {
2061 			if (tbp[i] == NULL)
2062 				continue;
2063 
2064 			switch (i) {
2065 			case NDTPA_QUEUE_LEN:
2066 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2067 					      nla_get_u32(tbp[i]) *
2068 					      SKB_TRUESIZE(ETH_FRAME_LEN));
2069 				break;
2070 			case NDTPA_QUEUE_LENBYTES:
2071 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2072 					      nla_get_u32(tbp[i]));
2073 				break;
2074 			case NDTPA_PROXY_QLEN:
2075 				NEIGH_VAR_SET(p, PROXY_QLEN,
2076 					      nla_get_u32(tbp[i]));
2077 				break;
2078 			case NDTPA_APP_PROBES:
2079 				NEIGH_VAR_SET(p, APP_PROBES,
2080 					      nla_get_u32(tbp[i]));
2081 				break;
2082 			case NDTPA_UCAST_PROBES:
2083 				NEIGH_VAR_SET(p, UCAST_PROBES,
2084 					      nla_get_u32(tbp[i]));
2085 				break;
2086 			case NDTPA_MCAST_PROBES:
2087 				NEIGH_VAR_SET(p, MCAST_PROBES,
2088 					      nla_get_u32(tbp[i]));
2089 				break;
2090 			case NDTPA_MCAST_REPROBES:
2091 				NEIGH_VAR_SET(p, MCAST_REPROBES,
2092 					      nla_get_u32(tbp[i]));
2093 				break;
2094 			case NDTPA_BASE_REACHABLE_TIME:
2095 				NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2096 					      nla_get_msecs(tbp[i]));
2097 				/* update reachable_time as well, otherwise, the change will
2098 				 * only be effective after the next time neigh_periodic_work
2099 				 * decides to recompute it (can be multiple minutes)
2100 				 */
2101 				p->reachable_time =
2102 					neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2103 				break;
2104 			case NDTPA_GC_STALETIME:
2105 				NEIGH_VAR_SET(p, GC_STALETIME,
2106 					      nla_get_msecs(tbp[i]));
2107 				break;
2108 			case NDTPA_DELAY_PROBE_TIME:
2109 				NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2110 					      nla_get_msecs(tbp[i]));
2111 				call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2112 				break;
2113 			case NDTPA_RETRANS_TIME:
2114 				NEIGH_VAR_SET(p, RETRANS_TIME,
2115 					      nla_get_msecs(tbp[i]));
2116 				break;
2117 			case NDTPA_ANYCAST_DELAY:
2118 				NEIGH_VAR_SET(p, ANYCAST_DELAY,
2119 					      nla_get_msecs(tbp[i]));
2120 				break;
2121 			case NDTPA_PROXY_DELAY:
2122 				NEIGH_VAR_SET(p, PROXY_DELAY,
2123 					      nla_get_msecs(tbp[i]));
2124 				break;
2125 			case NDTPA_LOCKTIME:
2126 				NEIGH_VAR_SET(p, LOCKTIME,
2127 					      nla_get_msecs(tbp[i]));
2128 				break;
2129 			}
2130 		}
2131 	}
2132 
2133 	err = -ENOENT;
2134 	if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2135 	     tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2136 	    !net_eq(net, &init_net))
2137 		goto errout_tbl_lock;
2138 
2139 	if (tb[NDTA_THRESH1])
2140 		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2141 
2142 	if (tb[NDTA_THRESH2])
2143 		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2144 
2145 	if (tb[NDTA_THRESH3])
2146 		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2147 
2148 	if (tb[NDTA_GC_INTERVAL])
2149 		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2150 
2151 	err = 0;
2152 
2153 errout_tbl_lock:
2154 	write_unlock_bh(&tbl->lock);
2155 errout:
2156 	return err;
2157 }
2158 
2159 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2160 {
2161 	struct net *net = sock_net(skb->sk);
2162 	int family, tidx, nidx = 0;
2163 	int tbl_skip = cb->args[0];
2164 	int neigh_skip = cb->args[1];
2165 	struct neigh_table *tbl;
2166 
2167 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2168 
2169 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2170 		struct neigh_parms *p;
2171 
2172 		tbl = neigh_tables[tidx];
2173 		if (!tbl)
2174 			continue;
2175 
2176 		if (tidx < tbl_skip || (family && tbl->family != family))
2177 			continue;
2178 
2179 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2180 				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2181 				       NLM_F_MULTI) < 0)
2182 			break;
2183 
2184 		nidx = 0;
2185 		p = list_next_entry(&tbl->parms, list);
2186 		list_for_each_entry_from(p, &tbl->parms_list, list) {
2187 			if (!net_eq(neigh_parms_net(p), net))
2188 				continue;
2189 
2190 			if (nidx < neigh_skip)
2191 				goto next;
2192 
2193 			if (neightbl_fill_param_info(skb, tbl, p,
2194 						     NETLINK_CB(cb->skb).portid,
2195 						     cb->nlh->nlmsg_seq,
2196 						     RTM_NEWNEIGHTBL,
2197 						     NLM_F_MULTI) < 0)
2198 				goto out;
2199 		next:
2200 			nidx++;
2201 		}
2202 
2203 		neigh_skip = 0;
2204 	}
2205 out:
2206 	cb->args[0] = tidx;
2207 	cb->args[1] = nidx;
2208 
2209 	return skb->len;
2210 }
2211 
2212 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2213 			   u32 pid, u32 seq, int type, unsigned int flags)
2214 {
2215 	unsigned long now = jiffies;
2216 	struct nda_cacheinfo ci;
2217 	struct nlmsghdr *nlh;
2218 	struct ndmsg *ndm;
2219 
2220 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2221 	if (nlh == NULL)
2222 		return -EMSGSIZE;
2223 
2224 	ndm = nlmsg_data(nlh);
2225 	ndm->ndm_family	 = neigh->ops->family;
2226 	ndm->ndm_pad1    = 0;
2227 	ndm->ndm_pad2    = 0;
2228 	ndm->ndm_flags	 = neigh->flags;
2229 	ndm->ndm_type	 = neigh->type;
2230 	ndm->ndm_ifindex = neigh->dev->ifindex;
2231 
2232 	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2233 		goto nla_put_failure;
2234 
2235 	read_lock_bh(&neigh->lock);
2236 	ndm->ndm_state	 = neigh->nud_state;
2237 	if (neigh->nud_state & NUD_VALID) {
2238 		char haddr[MAX_ADDR_LEN];
2239 
2240 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2241 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2242 			read_unlock_bh(&neigh->lock);
2243 			goto nla_put_failure;
2244 		}
2245 	}
2246 
2247 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2248 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2249 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2250 	ci.ndm_refcnt	 = refcount_read(&neigh->refcnt) - 1;
2251 	read_unlock_bh(&neigh->lock);
2252 
2253 	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2254 	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2255 		goto nla_put_failure;
2256 
2257 	nlmsg_end(skb, nlh);
2258 	return 0;
2259 
2260 nla_put_failure:
2261 	nlmsg_cancel(skb, nlh);
2262 	return -EMSGSIZE;
2263 }
2264 
2265 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2266 			    u32 pid, u32 seq, int type, unsigned int flags,
2267 			    struct neigh_table *tbl)
2268 {
2269 	struct nlmsghdr *nlh;
2270 	struct ndmsg *ndm;
2271 
2272 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2273 	if (nlh == NULL)
2274 		return -EMSGSIZE;
2275 
2276 	ndm = nlmsg_data(nlh);
2277 	ndm->ndm_family	 = tbl->family;
2278 	ndm->ndm_pad1    = 0;
2279 	ndm->ndm_pad2    = 0;
2280 	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
2281 	ndm->ndm_type	 = RTN_UNICAST;
2282 	ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2283 	ndm->ndm_state	 = NUD_NONE;
2284 
2285 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2286 		goto nla_put_failure;
2287 
2288 	nlmsg_end(skb, nlh);
2289 	return 0;
2290 
2291 nla_put_failure:
2292 	nlmsg_cancel(skb, nlh);
2293 	return -EMSGSIZE;
2294 }
2295 
2296 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2297 {
2298 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2299 	__neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2300 }
2301 
2302 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2303 {
2304 	struct net_device *master;
2305 
2306 	if (!master_idx)
2307 		return false;
2308 
2309 	master = netdev_master_upper_dev_get(dev);
2310 	if (!master || master->ifindex != master_idx)
2311 		return true;
2312 
2313 	return false;
2314 }
2315 
2316 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2317 {
2318 	if (filter_idx && dev->ifindex != filter_idx)
2319 		return true;
2320 
2321 	return false;
2322 }
2323 
2324 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2325 			    struct netlink_callback *cb)
2326 {
2327 	struct net *net = sock_net(skb->sk);
2328 	const struct nlmsghdr *nlh = cb->nlh;
2329 	struct nlattr *tb[NDA_MAX + 1];
2330 	struct neighbour *n;
2331 	int rc, h, s_h = cb->args[1];
2332 	int idx, s_idx = idx = cb->args[2];
2333 	struct neigh_hash_table *nht;
2334 	int filter_master_idx = 0, filter_idx = 0;
2335 	unsigned int flags = NLM_F_MULTI;
2336 	int err;
2337 
2338 	err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL);
2339 	if (!err) {
2340 		if (tb[NDA_IFINDEX]) {
2341 			if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
2342 				return -EINVAL;
2343 			filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
2344 		}
2345 		if (tb[NDA_MASTER]) {
2346 			if (nla_len(tb[NDA_MASTER]) != sizeof(u32))
2347 				return -EINVAL;
2348 			filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
2349 		}
2350 		if (filter_idx || filter_master_idx)
2351 			flags |= NLM_F_DUMP_FILTERED;
2352 	}
2353 
2354 	rcu_read_lock_bh();
2355 	nht = rcu_dereference_bh(tbl->nht);
2356 
2357 	for (h = s_h; h < (1 << nht->hash_shift); h++) {
2358 		if (h > s_h)
2359 			s_idx = 0;
2360 		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2361 		     n != NULL;
2362 		     n = rcu_dereference_bh(n->next)) {
2363 			if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2364 				goto next;
2365 			if (neigh_ifindex_filtered(n->dev, filter_idx) ||
2366 			    neigh_master_filtered(n->dev, filter_master_idx))
2367 				goto next;
2368 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2369 					    cb->nlh->nlmsg_seq,
2370 					    RTM_NEWNEIGH,
2371 					    flags) < 0) {
2372 				rc = -1;
2373 				goto out;
2374 			}
2375 next:
2376 			idx++;
2377 		}
2378 	}
2379 	rc = skb->len;
2380 out:
2381 	rcu_read_unlock_bh();
2382 	cb->args[1] = h;
2383 	cb->args[2] = idx;
2384 	return rc;
2385 }
2386 
2387 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2388 			     struct netlink_callback *cb)
2389 {
2390 	struct pneigh_entry *n;
2391 	struct net *net = sock_net(skb->sk);
2392 	int rc, h, s_h = cb->args[3];
2393 	int idx, s_idx = idx = cb->args[4];
2394 
2395 	read_lock_bh(&tbl->lock);
2396 
2397 	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2398 		if (h > s_h)
2399 			s_idx = 0;
2400 		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2401 			if (idx < s_idx || pneigh_net(n) != net)
2402 				goto next;
2403 			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2404 					    cb->nlh->nlmsg_seq,
2405 					    RTM_NEWNEIGH,
2406 					    NLM_F_MULTI, tbl) < 0) {
2407 				read_unlock_bh(&tbl->lock);
2408 				rc = -1;
2409 				goto out;
2410 			}
2411 		next:
2412 			idx++;
2413 		}
2414 	}
2415 
2416 	read_unlock_bh(&tbl->lock);
2417 	rc = skb->len;
2418 out:
2419 	cb->args[3] = h;
2420 	cb->args[4] = idx;
2421 	return rc;
2422 
2423 }
2424 
2425 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2426 {
2427 	struct neigh_table *tbl;
2428 	int t, family, s_t;
2429 	int proxy = 0;
2430 	int err;
2431 
2432 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2433 
2434 	/* check for full ndmsg structure presence, family member is
2435 	 * the same for both structures
2436 	 */
2437 	if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2438 	    ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2439 		proxy = 1;
2440 
2441 	s_t = cb->args[0];
2442 
2443 	for (t = 0; t < NEIGH_NR_TABLES; t++) {
2444 		tbl = neigh_tables[t];
2445 
2446 		if (!tbl)
2447 			continue;
2448 		if (t < s_t || (family && tbl->family != family))
2449 			continue;
2450 		if (t > s_t)
2451 			memset(&cb->args[1], 0, sizeof(cb->args) -
2452 						sizeof(cb->args[0]));
2453 		if (proxy)
2454 			err = pneigh_dump_table(tbl, skb, cb);
2455 		else
2456 			err = neigh_dump_table(tbl, skb, cb);
2457 		if (err < 0)
2458 			break;
2459 	}
2460 
2461 	cb->args[0] = t;
2462 	return skb->len;
2463 }
2464 
2465 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2466 {
2467 	int chain;
2468 	struct neigh_hash_table *nht;
2469 
2470 	rcu_read_lock_bh();
2471 	nht = rcu_dereference_bh(tbl->nht);
2472 
2473 	read_lock(&tbl->lock); /* avoid resizes */
2474 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2475 		struct neighbour *n;
2476 
2477 		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2478 		     n != NULL;
2479 		     n = rcu_dereference_bh(n->next))
2480 			cb(n, cookie);
2481 	}
2482 	read_unlock(&tbl->lock);
2483 	rcu_read_unlock_bh();
2484 }
2485 EXPORT_SYMBOL(neigh_for_each);
2486 
2487 /* The tbl->lock must be held as a writer and BH disabled. */
2488 void __neigh_for_each_release(struct neigh_table *tbl,
2489 			      int (*cb)(struct neighbour *))
2490 {
2491 	int chain;
2492 	struct neigh_hash_table *nht;
2493 
2494 	nht = rcu_dereference_protected(tbl->nht,
2495 					lockdep_is_held(&tbl->lock));
2496 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2497 		struct neighbour *n;
2498 		struct neighbour __rcu **np;
2499 
2500 		np = &nht->hash_buckets[chain];
2501 		while ((n = rcu_dereference_protected(*np,
2502 					lockdep_is_held(&tbl->lock))) != NULL) {
2503 			int release;
2504 
2505 			write_lock(&n->lock);
2506 			release = cb(n);
2507 			if (release) {
2508 				rcu_assign_pointer(*np,
2509 					rcu_dereference_protected(n->next,
2510 						lockdep_is_held(&tbl->lock)));
2511 				n->dead = 1;
2512 			} else
2513 				np = &n->next;
2514 			write_unlock(&n->lock);
2515 			if (release)
2516 				neigh_cleanup_and_release(n);
2517 		}
2518 	}
2519 }
2520 EXPORT_SYMBOL(__neigh_for_each_release);
2521 
2522 int neigh_xmit(int index, struct net_device *dev,
2523 	       const void *addr, struct sk_buff *skb)
2524 {
2525 	int err = -EAFNOSUPPORT;
2526 	if (likely(index < NEIGH_NR_TABLES)) {
2527 		struct neigh_table *tbl;
2528 		struct neighbour *neigh;
2529 
2530 		tbl = neigh_tables[index];
2531 		if (!tbl)
2532 			goto out;
2533 		rcu_read_lock_bh();
2534 		neigh = __neigh_lookup_noref(tbl, addr, dev);
2535 		if (!neigh)
2536 			neigh = __neigh_create(tbl, addr, dev, false);
2537 		err = PTR_ERR(neigh);
2538 		if (IS_ERR(neigh)) {
2539 			rcu_read_unlock_bh();
2540 			goto out_kfree_skb;
2541 		}
2542 		err = neigh->output(neigh, skb);
2543 		rcu_read_unlock_bh();
2544 	}
2545 	else if (index == NEIGH_LINK_TABLE) {
2546 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2547 				      addr, NULL, skb->len);
2548 		if (err < 0)
2549 			goto out_kfree_skb;
2550 		err = dev_queue_xmit(skb);
2551 	}
2552 out:
2553 	return err;
2554 out_kfree_skb:
2555 	kfree_skb(skb);
2556 	goto out;
2557 }
2558 EXPORT_SYMBOL(neigh_xmit);
2559 
2560 #ifdef CONFIG_PROC_FS
2561 
2562 static struct neighbour *neigh_get_first(struct seq_file *seq)
2563 {
2564 	struct neigh_seq_state *state = seq->private;
2565 	struct net *net = seq_file_net(seq);
2566 	struct neigh_hash_table *nht = state->nht;
2567 	struct neighbour *n = NULL;
2568 	int bucket = state->bucket;
2569 
2570 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2571 	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2572 		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2573 
2574 		while (n) {
2575 			if (!net_eq(dev_net(n->dev), net))
2576 				goto next;
2577 			if (state->neigh_sub_iter) {
2578 				loff_t fakep = 0;
2579 				void *v;
2580 
2581 				v = state->neigh_sub_iter(state, n, &fakep);
2582 				if (!v)
2583 					goto next;
2584 			}
2585 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2586 				break;
2587 			if (n->nud_state & ~NUD_NOARP)
2588 				break;
2589 next:
2590 			n = rcu_dereference_bh(n->next);
2591 		}
2592 
2593 		if (n)
2594 			break;
2595 	}
2596 	state->bucket = bucket;
2597 
2598 	return n;
2599 }
2600 
2601 static struct neighbour *neigh_get_next(struct seq_file *seq,
2602 					struct neighbour *n,
2603 					loff_t *pos)
2604 {
2605 	struct neigh_seq_state *state = seq->private;
2606 	struct net *net = seq_file_net(seq);
2607 	struct neigh_hash_table *nht = state->nht;
2608 
2609 	if (state->neigh_sub_iter) {
2610 		void *v = state->neigh_sub_iter(state, n, pos);
2611 		if (v)
2612 			return n;
2613 	}
2614 	n = rcu_dereference_bh(n->next);
2615 
2616 	while (1) {
2617 		while (n) {
2618 			if (!net_eq(dev_net(n->dev), net))
2619 				goto next;
2620 			if (state->neigh_sub_iter) {
2621 				void *v = state->neigh_sub_iter(state, n, pos);
2622 				if (v)
2623 					return n;
2624 				goto next;
2625 			}
2626 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2627 				break;
2628 
2629 			if (n->nud_state & ~NUD_NOARP)
2630 				break;
2631 next:
2632 			n = rcu_dereference_bh(n->next);
2633 		}
2634 
2635 		if (n)
2636 			break;
2637 
2638 		if (++state->bucket >= (1 << nht->hash_shift))
2639 			break;
2640 
2641 		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2642 	}
2643 
2644 	if (n && pos)
2645 		--(*pos);
2646 	return n;
2647 }
2648 
2649 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2650 {
2651 	struct neighbour *n = neigh_get_first(seq);
2652 
2653 	if (n) {
2654 		--(*pos);
2655 		while (*pos) {
2656 			n = neigh_get_next(seq, n, pos);
2657 			if (!n)
2658 				break;
2659 		}
2660 	}
2661 	return *pos ? NULL : n;
2662 }
2663 
2664 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2665 {
2666 	struct neigh_seq_state *state = seq->private;
2667 	struct net *net = seq_file_net(seq);
2668 	struct neigh_table *tbl = state->tbl;
2669 	struct pneigh_entry *pn = NULL;
2670 	int bucket = state->bucket;
2671 
2672 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
2673 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2674 		pn = tbl->phash_buckets[bucket];
2675 		while (pn && !net_eq(pneigh_net(pn), net))
2676 			pn = pn->next;
2677 		if (pn)
2678 			break;
2679 	}
2680 	state->bucket = bucket;
2681 
2682 	return pn;
2683 }
2684 
2685 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2686 					    struct pneigh_entry *pn,
2687 					    loff_t *pos)
2688 {
2689 	struct neigh_seq_state *state = seq->private;
2690 	struct net *net = seq_file_net(seq);
2691 	struct neigh_table *tbl = state->tbl;
2692 
2693 	do {
2694 		pn = pn->next;
2695 	} while (pn && !net_eq(pneigh_net(pn), net));
2696 
2697 	while (!pn) {
2698 		if (++state->bucket > PNEIGH_HASHMASK)
2699 			break;
2700 		pn = tbl->phash_buckets[state->bucket];
2701 		while (pn && !net_eq(pneigh_net(pn), net))
2702 			pn = pn->next;
2703 		if (pn)
2704 			break;
2705 	}
2706 
2707 	if (pn && pos)
2708 		--(*pos);
2709 
2710 	return pn;
2711 }
2712 
2713 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2714 {
2715 	struct pneigh_entry *pn = pneigh_get_first(seq);
2716 
2717 	if (pn) {
2718 		--(*pos);
2719 		while (*pos) {
2720 			pn = pneigh_get_next(seq, pn, pos);
2721 			if (!pn)
2722 				break;
2723 		}
2724 	}
2725 	return *pos ? NULL : pn;
2726 }
2727 
2728 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2729 {
2730 	struct neigh_seq_state *state = seq->private;
2731 	void *rc;
2732 	loff_t idxpos = *pos;
2733 
2734 	rc = neigh_get_idx(seq, &idxpos);
2735 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2736 		rc = pneigh_get_idx(seq, &idxpos);
2737 
2738 	return rc;
2739 }
2740 
2741 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2742 	__acquires(rcu_bh)
2743 {
2744 	struct neigh_seq_state *state = seq->private;
2745 
2746 	state->tbl = tbl;
2747 	state->bucket = 0;
2748 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2749 
2750 	rcu_read_lock_bh();
2751 	state->nht = rcu_dereference_bh(tbl->nht);
2752 
2753 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2754 }
2755 EXPORT_SYMBOL(neigh_seq_start);
2756 
2757 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2758 {
2759 	struct neigh_seq_state *state;
2760 	void *rc;
2761 
2762 	if (v == SEQ_START_TOKEN) {
2763 		rc = neigh_get_first(seq);
2764 		goto out;
2765 	}
2766 
2767 	state = seq->private;
2768 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2769 		rc = neigh_get_next(seq, v, NULL);
2770 		if (rc)
2771 			goto out;
2772 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2773 			rc = pneigh_get_first(seq);
2774 	} else {
2775 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2776 		rc = pneigh_get_next(seq, v, NULL);
2777 	}
2778 out:
2779 	++(*pos);
2780 	return rc;
2781 }
2782 EXPORT_SYMBOL(neigh_seq_next);
2783 
2784 void neigh_seq_stop(struct seq_file *seq, void *v)
2785 	__releases(rcu_bh)
2786 {
2787 	rcu_read_unlock_bh();
2788 }
2789 EXPORT_SYMBOL(neigh_seq_stop);
2790 
2791 /* statistics via seq_file */
2792 
2793 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2794 {
2795 	struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2796 	int cpu;
2797 
2798 	if (*pos == 0)
2799 		return SEQ_START_TOKEN;
2800 
2801 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2802 		if (!cpu_possible(cpu))
2803 			continue;
2804 		*pos = cpu+1;
2805 		return per_cpu_ptr(tbl->stats, cpu);
2806 	}
2807 	return NULL;
2808 }
2809 
2810 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2811 {
2812 	struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2813 	int cpu;
2814 
2815 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2816 		if (!cpu_possible(cpu))
2817 			continue;
2818 		*pos = cpu+1;
2819 		return per_cpu_ptr(tbl->stats, cpu);
2820 	}
2821 	return NULL;
2822 }
2823 
2824 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2825 {
2826 
2827 }
2828 
2829 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2830 {
2831 	struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2832 	struct neigh_statistics *st = v;
2833 
2834 	if (v == SEQ_START_TOKEN) {
2835 		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
2836 		return 0;
2837 	}
2838 
2839 	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2840 			"%08lx %08lx  %08lx %08lx %08lx %08lx\n",
2841 		   atomic_read(&tbl->entries),
2842 
2843 		   st->allocs,
2844 		   st->destroys,
2845 		   st->hash_grows,
2846 
2847 		   st->lookups,
2848 		   st->hits,
2849 
2850 		   st->res_failed,
2851 
2852 		   st->rcv_probes_mcast,
2853 		   st->rcv_probes_ucast,
2854 
2855 		   st->periodic_gc_runs,
2856 		   st->forced_gc_runs,
2857 		   st->unres_discards,
2858 		   st->table_fulls
2859 		   );
2860 
2861 	return 0;
2862 }
2863 
2864 static const struct seq_operations neigh_stat_seq_ops = {
2865 	.start	= neigh_stat_seq_start,
2866 	.next	= neigh_stat_seq_next,
2867 	.stop	= neigh_stat_seq_stop,
2868 	.show	= neigh_stat_seq_show,
2869 };
2870 #endif /* CONFIG_PROC_FS */
2871 
2872 static inline size_t neigh_nlmsg_size(void)
2873 {
2874 	return NLMSG_ALIGN(sizeof(struct ndmsg))
2875 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2876 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2877 	       + nla_total_size(sizeof(struct nda_cacheinfo))
2878 	       + nla_total_size(4); /* NDA_PROBES */
2879 }
2880 
2881 static void __neigh_notify(struct neighbour *n, int type, int flags,
2882 			   u32 pid)
2883 {
2884 	struct net *net = dev_net(n->dev);
2885 	struct sk_buff *skb;
2886 	int err = -ENOBUFS;
2887 
2888 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2889 	if (skb == NULL)
2890 		goto errout;
2891 
2892 	err = neigh_fill_info(skb, n, pid, 0, type, flags);
2893 	if (err < 0) {
2894 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2895 		WARN_ON(err == -EMSGSIZE);
2896 		kfree_skb(skb);
2897 		goto errout;
2898 	}
2899 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2900 	return;
2901 errout:
2902 	if (err < 0)
2903 		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2904 }
2905 
2906 void neigh_app_ns(struct neighbour *n)
2907 {
2908 	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
2909 }
2910 EXPORT_SYMBOL(neigh_app_ns);
2911 
2912 #ifdef CONFIG_SYSCTL
2913 static int zero;
2914 static int int_max = INT_MAX;
2915 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2916 
2917 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2918 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2919 {
2920 	int size, ret;
2921 	struct ctl_table tmp = *ctl;
2922 
2923 	tmp.extra1 = &zero;
2924 	tmp.extra2 = &unres_qlen_max;
2925 	tmp.data = &size;
2926 
2927 	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2928 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2929 
2930 	if (write && !ret)
2931 		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2932 	return ret;
2933 }
2934 
2935 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2936 						   int family)
2937 {
2938 	switch (family) {
2939 	case AF_INET:
2940 		return __in_dev_arp_parms_get_rcu(dev);
2941 	case AF_INET6:
2942 		return __in6_dev_nd_parms_get_rcu(dev);
2943 	}
2944 	return NULL;
2945 }
2946 
2947 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2948 				  int index)
2949 {
2950 	struct net_device *dev;
2951 	int family = neigh_parms_family(p);
2952 
2953 	rcu_read_lock();
2954 	for_each_netdev_rcu(net, dev) {
2955 		struct neigh_parms *dst_p =
2956 				neigh_get_dev_parms_rcu(dev, family);
2957 
2958 		if (dst_p && !test_bit(index, dst_p->data_state))
2959 			dst_p->data[index] = p->data[index];
2960 	}
2961 	rcu_read_unlock();
2962 }
2963 
2964 static void neigh_proc_update(struct ctl_table *ctl, int write)
2965 {
2966 	struct net_device *dev = ctl->extra1;
2967 	struct neigh_parms *p = ctl->extra2;
2968 	struct net *net = neigh_parms_net(p);
2969 	int index = (int *) ctl->data - p->data;
2970 
2971 	if (!write)
2972 		return;
2973 
2974 	set_bit(index, p->data_state);
2975 	if (index == NEIGH_VAR_DELAY_PROBE_TIME)
2976 		call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2977 	if (!dev) /* NULL dev means this is default value */
2978 		neigh_copy_dflt_parms(net, p, index);
2979 }
2980 
2981 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
2982 					   void __user *buffer,
2983 					   size_t *lenp, loff_t *ppos)
2984 {
2985 	struct ctl_table tmp = *ctl;
2986 	int ret;
2987 
2988 	tmp.extra1 = &zero;
2989 	tmp.extra2 = &int_max;
2990 
2991 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2992 	neigh_proc_update(ctl, write);
2993 	return ret;
2994 }
2995 
2996 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
2997 			void __user *buffer, size_t *lenp, loff_t *ppos)
2998 {
2999 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3000 
3001 	neigh_proc_update(ctl, write);
3002 	return ret;
3003 }
3004 EXPORT_SYMBOL(neigh_proc_dointvec);
3005 
3006 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3007 				void __user *buffer,
3008 				size_t *lenp, loff_t *ppos)
3009 {
3010 	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3011 
3012 	neigh_proc_update(ctl, write);
3013 	return ret;
3014 }
3015 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3016 
3017 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3018 					      void __user *buffer,
3019 					      size_t *lenp, loff_t *ppos)
3020 {
3021 	int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3022 
3023 	neigh_proc_update(ctl, write);
3024 	return ret;
3025 }
3026 
3027 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3028 				   void __user *buffer,
3029 				   size_t *lenp, loff_t *ppos)
3030 {
3031 	int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3032 
3033 	neigh_proc_update(ctl, write);
3034 	return ret;
3035 }
3036 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3037 
3038 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3039 					  void __user *buffer,
3040 					  size_t *lenp, loff_t *ppos)
3041 {
3042 	int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3043 
3044 	neigh_proc_update(ctl, write);
3045 	return ret;
3046 }
3047 
3048 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3049 					  void __user *buffer,
3050 					  size_t *lenp, loff_t *ppos)
3051 {
3052 	struct neigh_parms *p = ctl->extra2;
3053 	int ret;
3054 
3055 	if (strcmp(ctl->procname, "base_reachable_time") == 0)
3056 		ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3057 	else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3058 		ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3059 	else
3060 		ret = -1;
3061 
3062 	if (write && ret == 0) {
3063 		/* update reachable_time as well, otherwise, the change will
3064 		 * only be effective after the next time neigh_periodic_work
3065 		 * decides to recompute it
3066 		 */
3067 		p->reachable_time =
3068 			neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3069 	}
3070 	return ret;
3071 }
3072 
3073 #define NEIGH_PARMS_DATA_OFFSET(index)	\
3074 	(&((struct neigh_parms *) 0)->data[index])
3075 
3076 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3077 	[NEIGH_VAR_ ## attr] = { \
3078 		.procname	= name, \
3079 		.data		= NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3080 		.maxlen		= sizeof(int), \
3081 		.mode		= mval, \
3082 		.proc_handler	= proc, \
3083 	}
3084 
3085 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3086 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3087 
3088 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3089 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3090 
3091 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3092 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3093 
3094 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3095 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3096 
3097 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3098 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3099 
3100 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3101 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3102 
3103 static struct neigh_sysctl_table {
3104 	struct ctl_table_header *sysctl_header;
3105 	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3106 } neigh_sysctl_template __read_mostly = {
3107 	.neigh_vars = {
3108 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3109 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3110 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3111 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3112 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3113 		NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3114 		NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3115 		NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3116 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3117 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3118 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3119 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3120 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3121 		NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3122 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3123 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3124 		[NEIGH_VAR_GC_INTERVAL] = {
3125 			.procname	= "gc_interval",
3126 			.maxlen		= sizeof(int),
3127 			.mode		= 0644,
3128 			.proc_handler	= proc_dointvec_jiffies,
3129 		},
3130 		[NEIGH_VAR_GC_THRESH1] = {
3131 			.procname	= "gc_thresh1",
3132 			.maxlen		= sizeof(int),
3133 			.mode		= 0644,
3134 			.extra1 	= &zero,
3135 			.extra2		= &int_max,
3136 			.proc_handler	= proc_dointvec_minmax,
3137 		},
3138 		[NEIGH_VAR_GC_THRESH2] = {
3139 			.procname	= "gc_thresh2",
3140 			.maxlen		= sizeof(int),
3141 			.mode		= 0644,
3142 			.extra1 	= &zero,
3143 			.extra2		= &int_max,
3144 			.proc_handler	= proc_dointvec_minmax,
3145 		},
3146 		[NEIGH_VAR_GC_THRESH3] = {
3147 			.procname	= "gc_thresh3",
3148 			.maxlen		= sizeof(int),
3149 			.mode		= 0644,
3150 			.extra1 	= &zero,
3151 			.extra2		= &int_max,
3152 			.proc_handler	= proc_dointvec_minmax,
3153 		},
3154 		{},
3155 	},
3156 };
3157 
3158 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3159 			  proc_handler *handler)
3160 {
3161 	int i;
3162 	struct neigh_sysctl_table *t;
3163 	const char *dev_name_source;
3164 	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3165 	char *p_name;
3166 
3167 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3168 	if (!t)
3169 		goto err;
3170 
3171 	for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3172 		t->neigh_vars[i].data += (long) p;
3173 		t->neigh_vars[i].extra1 = dev;
3174 		t->neigh_vars[i].extra2 = p;
3175 	}
3176 
3177 	if (dev) {
3178 		dev_name_source = dev->name;
3179 		/* Terminate the table early */
3180 		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3181 		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3182 	} else {
3183 		struct neigh_table *tbl = p->tbl;
3184 		dev_name_source = "default";
3185 		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3186 		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3187 		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3188 		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3189 	}
3190 
3191 	if (handler) {
3192 		/* RetransTime */
3193 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3194 		/* ReachableTime */
3195 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3196 		/* RetransTime (in milliseconds)*/
3197 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3198 		/* ReachableTime (in milliseconds) */
3199 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3200 	} else {
3201 		/* Those handlers will update p->reachable_time after
3202 		 * base_reachable_time(_ms) is set to ensure the new timer starts being
3203 		 * applied after the next neighbour update instead of waiting for
3204 		 * neigh_periodic_work to update its value (can be multiple minutes)
3205 		 * So any handler that replaces them should do this as well
3206 		 */
3207 		/* ReachableTime */
3208 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3209 			neigh_proc_base_reachable_time;
3210 		/* ReachableTime (in milliseconds) */
3211 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3212 			neigh_proc_base_reachable_time;
3213 	}
3214 
3215 	/* Don't export sysctls to unprivileged users */
3216 	if (neigh_parms_net(p)->user_ns != &init_user_ns)
3217 		t->neigh_vars[0].procname = NULL;
3218 
3219 	switch (neigh_parms_family(p)) {
3220 	case AF_INET:
3221 	      p_name = "ipv4";
3222 	      break;
3223 	case AF_INET6:
3224 	      p_name = "ipv6";
3225 	      break;
3226 	default:
3227 	      BUG();
3228 	}
3229 
3230 	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3231 		p_name, dev_name_source);
3232 	t->sysctl_header =
3233 		register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3234 	if (!t->sysctl_header)
3235 		goto free;
3236 
3237 	p->sysctl_table = t;
3238 	return 0;
3239 
3240 free:
3241 	kfree(t);
3242 err:
3243 	return -ENOBUFS;
3244 }
3245 EXPORT_SYMBOL(neigh_sysctl_register);
3246 
3247 void neigh_sysctl_unregister(struct neigh_parms *p)
3248 {
3249 	if (p->sysctl_table) {
3250 		struct neigh_sysctl_table *t = p->sysctl_table;
3251 		p->sysctl_table = NULL;
3252 		unregister_net_sysctl_table(t->sysctl_header);
3253 		kfree(t);
3254 	}
3255 }
3256 EXPORT_SYMBOL(neigh_sysctl_unregister);
3257 
3258 #endif	/* CONFIG_SYSCTL */
3259 
3260 static int __init neigh_init(void)
3261 {
3262 	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3263 	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3264 	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
3265 
3266 	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3267 		      0);
3268 	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3269 
3270 	return 0;
3271 }
3272 
3273 subsys_initcall(neigh_init);
3274 
3275