xref: /openbmc/linux/net/core/neighbour.c (revision 77d84ff8)
1 /*
2  *	Generic address resolution entity
3  *
4  *	Authors:
5  *	Pedro Roque		<roque@di.fc.ul.pt>
6  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *	Fixes:
14  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
15  *	Harald Welte		Add neighbour cache statistics like rtstat
16  */
17 
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19 
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 
42 #define DEBUG
43 #define NEIGH_DEBUG 1
44 #define neigh_dbg(level, fmt, ...)		\
45 do {						\
46 	if (level <= NEIGH_DEBUG)		\
47 		pr_debug(fmt, ##__VA_ARGS__);	\
48 } while (0)
49 
50 #define PNEIGH_HASHMASK		0xF
51 
52 static void neigh_timer_handler(unsigned long arg);
53 static void __neigh_notify(struct neighbour *n, int type, int flags);
54 static void neigh_update_notify(struct neighbour *neigh);
55 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
56 
57 static struct neigh_table *neigh_tables;
58 #ifdef CONFIG_PROC_FS
59 static const struct file_operations neigh_stat_seq_fops;
60 #endif
61 
62 /*
63    Neighbour hash table buckets are protected with rwlock tbl->lock.
64 
65    - All the scans/updates to hash buckets MUST be made under this lock.
66    - NOTHING clever should be made under this lock: no callbacks
67      to protocol backends, no attempts to send something to network.
68      It will result in deadlocks, if backend/driver wants to use neighbour
69      cache.
70    - If the entry requires some non-trivial actions, increase
71      its reference count and release table lock.
72 
73    Neighbour entries are protected:
74    - with reference count.
75    - with rwlock neigh->lock
76 
77    Reference count prevents destruction.
78 
79    neigh->lock mainly serializes ll address data and its validity state.
80    However, the same lock is used to protect another entry fields:
81     - timer
82     - resolution queue
83 
84    Again, nothing clever shall be made under neigh->lock,
85    the most complicated procedure, which we allow is dev->hard_header.
86    It is supposed, that dev->hard_header is simplistic and does
87    not make callbacks to neighbour tables.
88 
89    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
90    list of neighbour tables. This list is used only in process context,
91  */
92 
93 static DEFINE_RWLOCK(neigh_tbl_lock);
94 
95 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
96 {
97 	kfree_skb(skb);
98 	return -ENETDOWN;
99 }
100 
101 static void neigh_cleanup_and_release(struct neighbour *neigh)
102 {
103 	if (neigh->parms->neigh_cleanup)
104 		neigh->parms->neigh_cleanup(neigh);
105 
106 	__neigh_notify(neigh, RTM_DELNEIGH, 0);
107 	neigh_release(neigh);
108 }
109 
110 /*
111  * It is random distribution in the interval (1/2)*base...(3/2)*base.
112  * It corresponds to default IPv6 settings and is not overridable,
113  * because it is really reasonable choice.
114  */
115 
116 unsigned long neigh_rand_reach_time(unsigned long base)
117 {
118 	return base ? (net_random() % base) + (base >> 1) : 0;
119 }
120 EXPORT_SYMBOL(neigh_rand_reach_time);
121 
122 
123 static int neigh_forced_gc(struct neigh_table *tbl)
124 {
125 	int shrunk = 0;
126 	int i;
127 	struct neigh_hash_table *nht;
128 
129 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
130 
131 	write_lock_bh(&tbl->lock);
132 	nht = rcu_dereference_protected(tbl->nht,
133 					lockdep_is_held(&tbl->lock));
134 	for (i = 0; i < (1 << nht->hash_shift); i++) {
135 		struct neighbour *n;
136 		struct neighbour __rcu **np;
137 
138 		np = &nht->hash_buckets[i];
139 		while ((n = rcu_dereference_protected(*np,
140 					lockdep_is_held(&tbl->lock))) != NULL) {
141 			/* Neighbour record may be discarded if:
142 			 * - nobody refers to it.
143 			 * - it is not permanent
144 			 */
145 			write_lock(&n->lock);
146 			if (atomic_read(&n->refcnt) == 1 &&
147 			    !(n->nud_state & NUD_PERMANENT)) {
148 				rcu_assign_pointer(*np,
149 					rcu_dereference_protected(n->next,
150 						  lockdep_is_held(&tbl->lock)));
151 				n->dead = 1;
152 				shrunk	= 1;
153 				write_unlock(&n->lock);
154 				neigh_cleanup_and_release(n);
155 				continue;
156 			}
157 			write_unlock(&n->lock);
158 			np = &n->next;
159 		}
160 	}
161 
162 	tbl->last_flush = jiffies;
163 
164 	write_unlock_bh(&tbl->lock);
165 
166 	return shrunk;
167 }
168 
169 static void neigh_add_timer(struct neighbour *n, unsigned long when)
170 {
171 	neigh_hold(n);
172 	if (unlikely(mod_timer(&n->timer, when))) {
173 		printk("NEIGH: BUG, double timer add, state is %x\n",
174 		       n->nud_state);
175 		dump_stack();
176 	}
177 }
178 
179 static int neigh_del_timer(struct neighbour *n)
180 {
181 	if ((n->nud_state & NUD_IN_TIMER) &&
182 	    del_timer(&n->timer)) {
183 		neigh_release(n);
184 		return 1;
185 	}
186 	return 0;
187 }
188 
189 static void pneigh_queue_purge(struct sk_buff_head *list)
190 {
191 	struct sk_buff *skb;
192 
193 	while ((skb = skb_dequeue(list)) != NULL) {
194 		dev_put(skb->dev);
195 		kfree_skb(skb);
196 	}
197 }
198 
199 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
200 {
201 	int i;
202 	struct neigh_hash_table *nht;
203 
204 	nht = rcu_dereference_protected(tbl->nht,
205 					lockdep_is_held(&tbl->lock));
206 
207 	for (i = 0; i < (1 << nht->hash_shift); i++) {
208 		struct neighbour *n;
209 		struct neighbour __rcu **np = &nht->hash_buckets[i];
210 
211 		while ((n = rcu_dereference_protected(*np,
212 					lockdep_is_held(&tbl->lock))) != NULL) {
213 			if (dev && n->dev != dev) {
214 				np = &n->next;
215 				continue;
216 			}
217 			rcu_assign_pointer(*np,
218 				   rcu_dereference_protected(n->next,
219 						lockdep_is_held(&tbl->lock)));
220 			write_lock(&n->lock);
221 			neigh_del_timer(n);
222 			n->dead = 1;
223 
224 			if (atomic_read(&n->refcnt) != 1) {
225 				/* The most unpleasant situation.
226 				   We must destroy neighbour entry,
227 				   but someone still uses it.
228 
229 				   The destroy will be delayed until
230 				   the last user releases us, but
231 				   we must kill timers etc. and move
232 				   it to safe state.
233 				 */
234 				__skb_queue_purge(&n->arp_queue);
235 				n->arp_queue_len_bytes = 0;
236 				n->output = neigh_blackhole;
237 				if (n->nud_state & NUD_VALID)
238 					n->nud_state = NUD_NOARP;
239 				else
240 					n->nud_state = NUD_NONE;
241 				neigh_dbg(2, "neigh %p is stray\n", n);
242 			}
243 			write_unlock(&n->lock);
244 			neigh_cleanup_and_release(n);
245 		}
246 	}
247 }
248 
249 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
250 {
251 	write_lock_bh(&tbl->lock);
252 	neigh_flush_dev(tbl, dev);
253 	write_unlock_bh(&tbl->lock);
254 }
255 EXPORT_SYMBOL(neigh_changeaddr);
256 
257 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
258 {
259 	write_lock_bh(&tbl->lock);
260 	neigh_flush_dev(tbl, dev);
261 	pneigh_ifdown(tbl, dev);
262 	write_unlock_bh(&tbl->lock);
263 
264 	del_timer_sync(&tbl->proxy_timer);
265 	pneigh_queue_purge(&tbl->proxy_queue);
266 	return 0;
267 }
268 EXPORT_SYMBOL(neigh_ifdown);
269 
270 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
271 {
272 	struct neighbour *n = NULL;
273 	unsigned long now = jiffies;
274 	int entries;
275 
276 	entries = atomic_inc_return(&tbl->entries) - 1;
277 	if (entries >= tbl->gc_thresh3 ||
278 	    (entries >= tbl->gc_thresh2 &&
279 	     time_after(now, tbl->last_flush + 5 * HZ))) {
280 		if (!neigh_forced_gc(tbl) &&
281 		    entries >= tbl->gc_thresh3)
282 			goto out_entries;
283 	}
284 
285 	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
286 	if (!n)
287 		goto out_entries;
288 
289 	__skb_queue_head_init(&n->arp_queue);
290 	rwlock_init(&n->lock);
291 	seqlock_init(&n->ha_lock);
292 	n->updated	  = n->used = now;
293 	n->nud_state	  = NUD_NONE;
294 	n->output	  = neigh_blackhole;
295 	seqlock_init(&n->hh.hh_lock);
296 	n->parms	  = neigh_parms_clone(&tbl->parms);
297 	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
298 
299 	NEIGH_CACHE_STAT_INC(tbl, allocs);
300 	n->tbl		  = tbl;
301 	atomic_set(&n->refcnt, 1);
302 	n->dead		  = 1;
303 out:
304 	return n;
305 
306 out_entries:
307 	atomic_dec(&tbl->entries);
308 	goto out;
309 }
310 
311 static void neigh_get_hash_rnd(u32 *x)
312 {
313 	get_random_bytes(x, sizeof(*x));
314 	*x |= 1;
315 }
316 
317 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
318 {
319 	size_t size = (1 << shift) * sizeof(struct neighbour *);
320 	struct neigh_hash_table *ret;
321 	struct neighbour __rcu **buckets;
322 	int i;
323 
324 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
325 	if (!ret)
326 		return NULL;
327 	if (size <= PAGE_SIZE)
328 		buckets = kzalloc(size, GFP_ATOMIC);
329 	else
330 		buckets = (struct neighbour __rcu **)
331 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
332 					   get_order(size));
333 	if (!buckets) {
334 		kfree(ret);
335 		return NULL;
336 	}
337 	ret->hash_buckets = buckets;
338 	ret->hash_shift = shift;
339 	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
340 		neigh_get_hash_rnd(&ret->hash_rnd[i]);
341 	return ret;
342 }
343 
344 static void neigh_hash_free_rcu(struct rcu_head *head)
345 {
346 	struct neigh_hash_table *nht = container_of(head,
347 						    struct neigh_hash_table,
348 						    rcu);
349 	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
350 	struct neighbour __rcu **buckets = nht->hash_buckets;
351 
352 	if (size <= PAGE_SIZE)
353 		kfree(buckets);
354 	else
355 		free_pages((unsigned long)buckets, get_order(size));
356 	kfree(nht);
357 }
358 
359 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
360 						unsigned long new_shift)
361 {
362 	unsigned int i, hash;
363 	struct neigh_hash_table *new_nht, *old_nht;
364 
365 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
366 
367 	old_nht = rcu_dereference_protected(tbl->nht,
368 					    lockdep_is_held(&tbl->lock));
369 	new_nht = neigh_hash_alloc(new_shift);
370 	if (!new_nht)
371 		return old_nht;
372 
373 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
374 		struct neighbour *n, *next;
375 
376 		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
377 						   lockdep_is_held(&tbl->lock));
378 		     n != NULL;
379 		     n = next) {
380 			hash = tbl->hash(n->primary_key, n->dev,
381 					 new_nht->hash_rnd);
382 
383 			hash >>= (32 - new_nht->hash_shift);
384 			next = rcu_dereference_protected(n->next,
385 						lockdep_is_held(&tbl->lock));
386 
387 			rcu_assign_pointer(n->next,
388 					   rcu_dereference_protected(
389 						new_nht->hash_buckets[hash],
390 						lockdep_is_held(&tbl->lock)));
391 			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
392 		}
393 	}
394 
395 	rcu_assign_pointer(tbl->nht, new_nht);
396 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
397 	return new_nht;
398 }
399 
400 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
401 			       struct net_device *dev)
402 {
403 	struct neighbour *n;
404 	int key_len = tbl->key_len;
405 	u32 hash_val;
406 	struct neigh_hash_table *nht;
407 
408 	NEIGH_CACHE_STAT_INC(tbl, lookups);
409 
410 	rcu_read_lock_bh();
411 	nht = rcu_dereference_bh(tbl->nht);
412 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
413 
414 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
415 	     n != NULL;
416 	     n = rcu_dereference_bh(n->next)) {
417 		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
418 			if (!atomic_inc_not_zero(&n->refcnt))
419 				n = NULL;
420 			NEIGH_CACHE_STAT_INC(tbl, hits);
421 			break;
422 		}
423 	}
424 
425 	rcu_read_unlock_bh();
426 	return n;
427 }
428 EXPORT_SYMBOL(neigh_lookup);
429 
430 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
431 				     const void *pkey)
432 {
433 	struct neighbour *n;
434 	int key_len = tbl->key_len;
435 	u32 hash_val;
436 	struct neigh_hash_table *nht;
437 
438 	NEIGH_CACHE_STAT_INC(tbl, lookups);
439 
440 	rcu_read_lock_bh();
441 	nht = rcu_dereference_bh(tbl->nht);
442 	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
443 
444 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
445 	     n != NULL;
446 	     n = rcu_dereference_bh(n->next)) {
447 		if (!memcmp(n->primary_key, pkey, key_len) &&
448 		    net_eq(dev_net(n->dev), net)) {
449 			if (!atomic_inc_not_zero(&n->refcnt))
450 				n = NULL;
451 			NEIGH_CACHE_STAT_INC(tbl, hits);
452 			break;
453 		}
454 	}
455 
456 	rcu_read_unlock_bh();
457 	return n;
458 }
459 EXPORT_SYMBOL(neigh_lookup_nodev);
460 
461 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
462 				 struct net_device *dev, bool want_ref)
463 {
464 	u32 hash_val;
465 	int key_len = tbl->key_len;
466 	int error;
467 	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
468 	struct neigh_hash_table *nht;
469 
470 	if (!n) {
471 		rc = ERR_PTR(-ENOBUFS);
472 		goto out;
473 	}
474 
475 	memcpy(n->primary_key, pkey, key_len);
476 	n->dev = dev;
477 	dev_hold(dev);
478 
479 	/* Protocol specific setup. */
480 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
481 		rc = ERR_PTR(error);
482 		goto out_neigh_release;
483 	}
484 
485 	if (dev->netdev_ops->ndo_neigh_construct) {
486 		error = dev->netdev_ops->ndo_neigh_construct(n);
487 		if (error < 0) {
488 			rc = ERR_PTR(error);
489 			goto out_neigh_release;
490 		}
491 	}
492 
493 	/* Device specific setup. */
494 	if (n->parms->neigh_setup &&
495 	    (error = n->parms->neigh_setup(n)) < 0) {
496 		rc = ERR_PTR(error);
497 		goto out_neigh_release;
498 	}
499 
500 	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
501 
502 	write_lock_bh(&tbl->lock);
503 	nht = rcu_dereference_protected(tbl->nht,
504 					lockdep_is_held(&tbl->lock));
505 
506 	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
507 		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
508 
509 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
510 
511 	if (n->parms->dead) {
512 		rc = ERR_PTR(-EINVAL);
513 		goto out_tbl_unlock;
514 	}
515 
516 	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
517 					    lockdep_is_held(&tbl->lock));
518 	     n1 != NULL;
519 	     n1 = rcu_dereference_protected(n1->next,
520 			lockdep_is_held(&tbl->lock))) {
521 		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
522 			if (want_ref)
523 				neigh_hold(n1);
524 			rc = n1;
525 			goto out_tbl_unlock;
526 		}
527 	}
528 
529 	n->dead = 0;
530 	if (want_ref)
531 		neigh_hold(n);
532 	rcu_assign_pointer(n->next,
533 			   rcu_dereference_protected(nht->hash_buckets[hash_val],
534 						     lockdep_is_held(&tbl->lock)));
535 	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
536 	write_unlock_bh(&tbl->lock);
537 	neigh_dbg(2, "neigh %p is created\n", n);
538 	rc = n;
539 out:
540 	return rc;
541 out_tbl_unlock:
542 	write_unlock_bh(&tbl->lock);
543 out_neigh_release:
544 	neigh_release(n);
545 	goto out;
546 }
547 EXPORT_SYMBOL(__neigh_create);
548 
549 static u32 pneigh_hash(const void *pkey, int key_len)
550 {
551 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
552 	hash_val ^= (hash_val >> 16);
553 	hash_val ^= hash_val >> 8;
554 	hash_val ^= hash_val >> 4;
555 	hash_val &= PNEIGH_HASHMASK;
556 	return hash_val;
557 }
558 
559 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
560 					      struct net *net,
561 					      const void *pkey,
562 					      int key_len,
563 					      struct net_device *dev)
564 {
565 	while (n) {
566 		if (!memcmp(n->key, pkey, key_len) &&
567 		    net_eq(pneigh_net(n), net) &&
568 		    (n->dev == dev || !n->dev))
569 			return n;
570 		n = n->next;
571 	}
572 	return NULL;
573 }
574 
575 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
576 		struct net *net, const void *pkey, struct net_device *dev)
577 {
578 	int key_len = tbl->key_len;
579 	u32 hash_val = pneigh_hash(pkey, key_len);
580 
581 	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
582 				 net, pkey, key_len, dev);
583 }
584 EXPORT_SYMBOL_GPL(__pneigh_lookup);
585 
586 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
587 				    struct net *net, const void *pkey,
588 				    struct net_device *dev, int creat)
589 {
590 	struct pneigh_entry *n;
591 	int key_len = tbl->key_len;
592 	u32 hash_val = pneigh_hash(pkey, key_len);
593 
594 	read_lock_bh(&tbl->lock);
595 	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
596 			      net, pkey, key_len, dev);
597 	read_unlock_bh(&tbl->lock);
598 
599 	if (n || !creat)
600 		goto out;
601 
602 	ASSERT_RTNL();
603 
604 	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
605 	if (!n)
606 		goto out;
607 
608 	write_pnet(&n->net, hold_net(net));
609 	memcpy(n->key, pkey, key_len);
610 	n->dev = dev;
611 	if (dev)
612 		dev_hold(dev);
613 
614 	if (tbl->pconstructor && tbl->pconstructor(n)) {
615 		if (dev)
616 			dev_put(dev);
617 		release_net(net);
618 		kfree(n);
619 		n = NULL;
620 		goto out;
621 	}
622 
623 	write_lock_bh(&tbl->lock);
624 	n->next = tbl->phash_buckets[hash_val];
625 	tbl->phash_buckets[hash_val] = n;
626 	write_unlock_bh(&tbl->lock);
627 out:
628 	return n;
629 }
630 EXPORT_SYMBOL(pneigh_lookup);
631 
632 
633 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
634 		  struct net_device *dev)
635 {
636 	struct pneigh_entry *n, **np;
637 	int key_len = tbl->key_len;
638 	u32 hash_val = pneigh_hash(pkey, key_len);
639 
640 	write_lock_bh(&tbl->lock);
641 	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
642 	     np = &n->next) {
643 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
644 		    net_eq(pneigh_net(n), net)) {
645 			*np = n->next;
646 			write_unlock_bh(&tbl->lock);
647 			if (tbl->pdestructor)
648 				tbl->pdestructor(n);
649 			if (n->dev)
650 				dev_put(n->dev);
651 			release_net(pneigh_net(n));
652 			kfree(n);
653 			return 0;
654 		}
655 	}
656 	write_unlock_bh(&tbl->lock);
657 	return -ENOENT;
658 }
659 
660 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
661 {
662 	struct pneigh_entry *n, **np;
663 	u32 h;
664 
665 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
666 		np = &tbl->phash_buckets[h];
667 		while ((n = *np) != NULL) {
668 			if (!dev || n->dev == dev) {
669 				*np = n->next;
670 				if (tbl->pdestructor)
671 					tbl->pdestructor(n);
672 				if (n->dev)
673 					dev_put(n->dev);
674 				release_net(pneigh_net(n));
675 				kfree(n);
676 				continue;
677 			}
678 			np = &n->next;
679 		}
680 	}
681 	return -ENOENT;
682 }
683 
684 static void neigh_parms_destroy(struct neigh_parms *parms);
685 
686 static inline void neigh_parms_put(struct neigh_parms *parms)
687 {
688 	if (atomic_dec_and_test(&parms->refcnt))
689 		neigh_parms_destroy(parms);
690 }
691 
692 /*
693  *	neighbour must already be out of the table;
694  *
695  */
696 void neigh_destroy(struct neighbour *neigh)
697 {
698 	struct net_device *dev = neigh->dev;
699 
700 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
701 
702 	if (!neigh->dead) {
703 		pr_warn("Destroying alive neighbour %p\n", neigh);
704 		dump_stack();
705 		return;
706 	}
707 
708 	if (neigh_del_timer(neigh))
709 		pr_warn("Impossible event\n");
710 
711 	write_lock_bh(&neigh->lock);
712 	__skb_queue_purge(&neigh->arp_queue);
713 	write_unlock_bh(&neigh->lock);
714 	neigh->arp_queue_len_bytes = 0;
715 
716 	if (dev->netdev_ops->ndo_neigh_destroy)
717 		dev->netdev_ops->ndo_neigh_destroy(neigh);
718 
719 	dev_put(dev);
720 	neigh_parms_put(neigh->parms);
721 
722 	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
723 
724 	atomic_dec(&neigh->tbl->entries);
725 	kfree_rcu(neigh, rcu);
726 }
727 EXPORT_SYMBOL(neigh_destroy);
728 
729 /* Neighbour state is suspicious;
730    disable fast path.
731 
732    Called with write_locked neigh.
733  */
734 static void neigh_suspect(struct neighbour *neigh)
735 {
736 	neigh_dbg(2, "neigh %p is suspected\n", neigh);
737 
738 	neigh->output = neigh->ops->output;
739 }
740 
741 /* Neighbour state is OK;
742    enable fast path.
743 
744    Called with write_locked neigh.
745  */
746 static void neigh_connect(struct neighbour *neigh)
747 {
748 	neigh_dbg(2, "neigh %p is connected\n", neigh);
749 
750 	neigh->output = neigh->ops->connected_output;
751 }
752 
753 static void neigh_periodic_work(struct work_struct *work)
754 {
755 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
756 	struct neighbour *n;
757 	struct neighbour __rcu **np;
758 	unsigned int i;
759 	struct neigh_hash_table *nht;
760 
761 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
762 
763 	write_lock_bh(&tbl->lock);
764 	nht = rcu_dereference_protected(tbl->nht,
765 					lockdep_is_held(&tbl->lock));
766 
767 	if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
768 		goto out;
769 
770 	/*
771 	 *	periodically recompute ReachableTime from random function
772 	 */
773 
774 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
775 		struct neigh_parms *p;
776 		tbl->last_rand = jiffies;
777 		for (p = &tbl->parms; p; p = p->next)
778 			p->reachable_time =
779 				neigh_rand_reach_time(p->base_reachable_time);
780 	}
781 
782 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
783 		np = &nht->hash_buckets[i];
784 
785 		while ((n = rcu_dereference_protected(*np,
786 				lockdep_is_held(&tbl->lock))) != NULL) {
787 			unsigned int state;
788 
789 			write_lock(&n->lock);
790 
791 			state = n->nud_state;
792 			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
793 				write_unlock(&n->lock);
794 				goto next_elt;
795 			}
796 
797 			if (time_before(n->used, n->confirmed))
798 				n->used = n->confirmed;
799 
800 			if (atomic_read(&n->refcnt) == 1 &&
801 			    (state == NUD_FAILED ||
802 			     time_after(jiffies, n->used + n->parms->gc_staletime))) {
803 				*np = n->next;
804 				n->dead = 1;
805 				write_unlock(&n->lock);
806 				neigh_cleanup_and_release(n);
807 				continue;
808 			}
809 			write_unlock(&n->lock);
810 
811 next_elt:
812 			np = &n->next;
813 		}
814 		/*
815 		 * It's fine to release lock here, even if hash table
816 		 * grows while we are preempted.
817 		 */
818 		write_unlock_bh(&tbl->lock);
819 		cond_resched();
820 		write_lock_bh(&tbl->lock);
821 		nht = rcu_dereference_protected(tbl->nht,
822 						lockdep_is_held(&tbl->lock));
823 	}
824 out:
825 	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
826 	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
827 	 * base_reachable_time.
828 	 */
829 	schedule_delayed_work(&tbl->gc_work,
830 			      tbl->parms.base_reachable_time >> 1);
831 	write_unlock_bh(&tbl->lock);
832 }
833 
834 static __inline__ int neigh_max_probes(struct neighbour *n)
835 {
836 	struct neigh_parms *p = n->parms;
837 	return (n->nud_state & NUD_PROBE) ?
838 		p->ucast_probes :
839 		p->ucast_probes + p->app_probes + p->mcast_probes;
840 }
841 
842 static void neigh_invalidate(struct neighbour *neigh)
843 	__releases(neigh->lock)
844 	__acquires(neigh->lock)
845 {
846 	struct sk_buff *skb;
847 
848 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
849 	neigh_dbg(2, "neigh %p is failed\n", neigh);
850 	neigh->updated = jiffies;
851 
852 	/* It is very thin place. report_unreachable is very complicated
853 	   routine. Particularly, it can hit the same neighbour entry!
854 
855 	   So that, we try to be accurate and avoid dead loop. --ANK
856 	 */
857 	while (neigh->nud_state == NUD_FAILED &&
858 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
859 		write_unlock(&neigh->lock);
860 		neigh->ops->error_report(neigh, skb);
861 		write_lock(&neigh->lock);
862 	}
863 	__skb_queue_purge(&neigh->arp_queue);
864 	neigh->arp_queue_len_bytes = 0;
865 }
866 
867 static void neigh_probe(struct neighbour *neigh)
868 	__releases(neigh->lock)
869 {
870 	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
871 	/* keep skb alive even if arp_queue overflows */
872 	if (skb)
873 		skb = skb_copy(skb, GFP_ATOMIC);
874 	write_unlock(&neigh->lock);
875 	neigh->ops->solicit(neigh, skb);
876 	atomic_inc(&neigh->probes);
877 	kfree_skb(skb);
878 }
879 
880 /* Called when a timer expires for a neighbour entry. */
881 
882 static void neigh_timer_handler(unsigned long arg)
883 {
884 	unsigned long now, next;
885 	struct neighbour *neigh = (struct neighbour *)arg;
886 	unsigned int state;
887 	int notify = 0;
888 
889 	write_lock(&neigh->lock);
890 
891 	state = neigh->nud_state;
892 	now = jiffies;
893 	next = now + HZ;
894 
895 	if (!(state & NUD_IN_TIMER))
896 		goto out;
897 
898 	if (state & NUD_REACHABLE) {
899 		if (time_before_eq(now,
900 				   neigh->confirmed + neigh->parms->reachable_time)) {
901 			neigh_dbg(2, "neigh %p is still alive\n", neigh);
902 			next = neigh->confirmed + neigh->parms->reachable_time;
903 		} else if (time_before_eq(now,
904 					  neigh->used + neigh->parms->delay_probe_time)) {
905 			neigh_dbg(2, "neigh %p is delayed\n", neigh);
906 			neigh->nud_state = NUD_DELAY;
907 			neigh->updated = jiffies;
908 			neigh_suspect(neigh);
909 			next = now + neigh->parms->delay_probe_time;
910 		} else {
911 			neigh_dbg(2, "neigh %p is suspected\n", neigh);
912 			neigh->nud_state = NUD_STALE;
913 			neigh->updated = jiffies;
914 			neigh_suspect(neigh);
915 			notify = 1;
916 		}
917 	} else if (state & NUD_DELAY) {
918 		if (time_before_eq(now,
919 				   neigh->confirmed + neigh->parms->delay_probe_time)) {
920 			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
921 			neigh->nud_state = NUD_REACHABLE;
922 			neigh->updated = jiffies;
923 			neigh_connect(neigh);
924 			notify = 1;
925 			next = neigh->confirmed + neigh->parms->reachable_time;
926 		} else {
927 			neigh_dbg(2, "neigh %p is probed\n", neigh);
928 			neigh->nud_state = NUD_PROBE;
929 			neigh->updated = jiffies;
930 			atomic_set(&neigh->probes, 0);
931 			next = now + neigh->parms->retrans_time;
932 		}
933 	} else {
934 		/* NUD_PROBE|NUD_INCOMPLETE */
935 		next = now + neigh->parms->retrans_time;
936 	}
937 
938 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
939 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
940 		neigh->nud_state = NUD_FAILED;
941 		notify = 1;
942 		neigh_invalidate(neigh);
943 	}
944 
945 	if (neigh->nud_state & NUD_IN_TIMER) {
946 		if (time_before(next, jiffies + HZ/2))
947 			next = jiffies + HZ/2;
948 		if (!mod_timer(&neigh->timer, next))
949 			neigh_hold(neigh);
950 	}
951 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
952 		neigh_probe(neigh);
953 	} else {
954 out:
955 		write_unlock(&neigh->lock);
956 	}
957 
958 	if (notify)
959 		neigh_update_notify(neigh);
960 
961 	neigh_release(neigh);
962 }
963 
964 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
965 {
966 	int rc;
967 	bool immediate_probe = false;
968 
969 	write_lock_bh(&neigh->lock);
970 
971 	rc = 0;
972 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
973 		goto out_unlock_bh;
974 
975 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
976 		if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
977 			unsigned long next, now = jiffies;
978 
979 			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
980 			neigh->nud_state     = NUD_INCOMPLETE;
981 			neigh->updated = now;
982 			next = now + max(neigh->parms->retrans_time, HZ/2);
983 			neigh_add_timer(neigh, next);
984 			immediate_probe = true;
985 		} else {
986 			neigh->nud_state = NUD_FAILED;
987 			neigh->updated = jiffies;
988 			write_unlock_bh(&neigh->lock);
989 
990 			kfree_skb(skb);
991 			return 1;
992 		}
993 	} else if (neigh->nud_state & NUD_STALE) {
994 		neigh_dbg(2, "neigh %p is delayed\n", neigh);
995 		neigh->nud_state = NUD_DELAY;
996 		neigh->updated = jiffies;
997 		neigh_add_timer(neigh,
998 				jiffies + neigh->parms->delay_probe_time);
999 	}
1000 
1001 	if (neigh->nud_state == NUD_INCOMPLETE) {
1002 		if (skb) {
1003 			while (neigh->arp_queue_len_bytes + skb->truesize >
1004 			       neigh->parms->queue_len_bytes) {
1005 				struct sk_buff *buff;
1006 
1007 				buff = __skb_dequeue(&neigh->arp_queue);
1008 				if (!buff)
1009 					break;
1010 				neigh->arp_queue_len_bytes -= buff->truesize;
1011 				kfree_skb(buff);
1012 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1013 			}
1014 			skb_dst_force(skb);
1015 			__skb_queue_tail(&neigh->arp_queue, skb);
1016 			neigh->arp_queue_len_bytes += skb->truesize;
1017 		}
1018 		rc = 1;
1019 	}
1020 out_unlock_bh:
1021 	if (immediate_probe)
1022 		neigh_probe(neigh);
1023 	else
1024 		write_unlock(&neigh->lock);
1025 	local_bh_enable();
1026 	return rc;
1027 }
1028 EXPORT_SYMBOL(__neigh_event_send);
1029 
1030 static void neigh_update_hhs(struct neighbour *neigh)
1031 {
1032 	struct hh_cache *hh;
1033 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1034 		= NULL;
1035 
1036 	if (neigh->dev->header_ops)
1037 		update = neigh->dev->header_ops->cache_update;
1038 
1039 	if (update) {
1040 		hh = &neigh->hh;
1041 		if (hh->hh_len) {
1042 			write_seqlock_bh(&hh->hh_lock);
1043 			update(hh, neigh->dev, neigh->ha);
1044 			write_sequnlock_bh(&hh->hh_lock);
1045 		}
1046 	}
1047 }
1048 
1049 
1050 
1051 /* Generic update routine.
1052    -- lladdr is new lladdr or NULL, if it is not supplied.
1053    -- new    is new state.
1054    -- flags
1055 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1056 				if it is different.
1057 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1058 				lladdr instead of overriding it
1059 				if it is different.
1060 				It also allows to retain current state
1061 				if lladdr is unchanged.
1062 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1063 
1064 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1065 				NTF_ROUTER flag.
1066 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1067 				a router.
1068 
1069    Caller MUST hold reference count on the entry.
1070  */
1071 
1072 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1073 		 u32 flags)
1074 {
1075 	u8 old;
1076 	int err;
1077 	int notify = 0;
1078 	struct net_device *dev;
1079 	int update_isrouter = 0;
1080 
1081 	write_lock_bh(&neigh->lock);
1082 
1083 	dev    = neigh->dev;
1084 	old    = neigh->nud_state;
1085 	err    = -EPERM;
1086 
1087 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1088 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1089 		goto out;
1090 
1091 	if (!(new & NUD_VALID)) {
1092 		neigh_del_timer(neigh);
1093 		if (old & NUD_CONNECTED)
1094 			neigh_suspect(neigh);
1095 		neigh->nud_state = new;
1096 		err = 0;
1097 		notify = old & NUD_VALID;
1098 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1099 		    (new & NUD_FAILED)) {
1100 			neigh_invalidate(neigh);
1101 			notify = 1;
1102 		}
1103 		goto out;
1104 	}
1105 
1106 	/* Compare new lladdr with cached one */
1107 	if (!dev->addr_len) {
1108 		/* First case: device needs no address. */
1109 		lladdr = neigh->ha;
1110 	} else if (lladdr) {
1111 		/* The second case: if something is already cached
1112 		   and a new address is proposed:
1113 		   - compare new & old
1114 		   - if they are different, check override flag
1115 		 */
1116 		if ((old & NUD_VALID) &&
1117 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1118 			lladdr = neigh->ha;
1119 	} else {
1120 		/* No address is supplied; if we know something,
1121 		   use it, otherwise discard the request.
1122 		 */
1123 		err = -EINVAL;
1124 		if (!(old & NUD_VALID))
1125 			goto out;
1126 		lladdr = neigh->ha;
1127 	}
1128 
1129 	if (new & NUD_CONNECTED)
1130 		neigh->confirmed = jiffies;
1131 	neigh->updated = jiffies;
1132 
1133 	/* If entry was valid and address is not changed,
1134 	   do not change entry state, if new one is STALE.
1135 	 */
1136 	err = 0;
1137 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1138 	if (old & NUD_VALID) {
1139 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1140 			update_isrouter = 0;
1141 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1142 			    (old & NUD_CONNECTED)) {
1143 				lladdr = neigh->ha;
1144 				new = NUD_STALE;
1145 			} else
1146 				goto out;
1147 		} else {
1148 			if (lladdr == neigh->ha && new == NUD_STALE &&
1149 			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1150 			     (old & NUD_CONNECTED))
1151 			    )
1152 				new = old;
1153 		}
1154 	}
1155 
1156 	if (new != old) {
1157 		neigh_del_timer(neigh);
1158 		if (new & NUD_IN_TIMER)
1159 			neigh_add_timer(neigh, (jiffies +
1160 						((new & NUD_REACHABLE) ?
1161 						 neigh->parms->reachable_time :
1162 						 0)));
1163 		neigh->nud_state = new;
1164 	}
1165 
1166 	if (lladdr != neigh->ha) {
1167 		write_seqlock(&neigh->ha_lock);
1168 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1169 		write_sequnlock(&neigh->ha_lock);
1170 		neigh_update_hhs(neigh);
1171 		if (!(new & NUD_CONNECTED))
1172 			neigh->confirmed = jiffies -
1173 				      (neigh->parms->base_reachable_time << 1);
1174 		notify = 1;
1175 	}
1176 	if (new == old)
1177 		goto out;
1178 	if (new & NUD_CONNECTED)
1179 		neigh_connect(neigh);
1180 	else
1181 		neigh_suspect(neigh);
1182 	if (!(old & NUD_VALID)) {
1183 		struct sk_buff *skb;
1184 
1185 		/* Again: avoid dead loop if something went wrong */
1186 
1187 		while (neigh->nud_state & NUD_VALID &&
1188 		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1189 			struct dst_entry *dst = skb_dst(skb);
1190 			struct neighbour *n2, *n1 = neigh;
1191 			write_unlock_bh(&neigh->lock);
1192 
1193 			rcu_read_lock();
1194 
1195 			/* Why not just use 'neigh' as-is?  The problem is that
1196 			 * things such as shaper, eql, and sch_teql can end up
1197 			 * using alternative, different, neigh objects to output
1198 			 * the packet in the output path.  So what we need to do
1199 			 * here is re-lookup the top-level neigh in the path so
1200 			 * we can reinject the packet there.
1201 			 */
1202 			n2 = NULL;
1203 			if (dst) {
1204 				n2 = dst_neigh_lookup_skb(dst, skb);
1205 				if (n2)
1206 					n1 = n2;
1207 			}
1208 			n1->output(n1, skb);
1209 			if (n2)
1210 				neigh_release(n2);
1211 			rcu_read_unlock();
1212 
1213 			write_lock_bh(&neigh->lock);
1214 		}
1215 		__skb_queue_purge(&neigh->arp_queue);
1216 		neigh->arp_queue_len_bytes = 0;
1217 	}
1218 out:
1219 	if (update_isrouter) {
1220 		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1221 			(neigh->flags | NTF_ROUTER) :
1222 			(neigh->flags & ~NTF_ROUTER);
1223 	}
1224 	write_unlock_bh(&neigh->lock);
1225 
1226 	if (notify)
1227 		neigh_update_notify(neigh);
1228 
1229 	return err;
1230 }
1231 EXPORT_SYMBOL(neigh_update);
1232 
1233 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1234 				 u8 *lladdr, void *saddr,
1235 				 struct net_device *dev)
1236 {
1237 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1238 						 lladdr || !dev->addr_len);
1239 	if (neigh)
1240 		neigh_update(neigh, lladdr, NUD_STALE,
1241 			     NEIGH_UPDATE_F_OVERRIDE);
1242 	return neigh;
1243 }
1244 EXPORT_SYMBOL(neigh_event_ns);
1245 
1246 /* called with read_lock_bh(&n->lock); */
1247 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1248 {
1249 	struct net_device *dev = dst->dev;
1250 	__be16 prot = dst->ops->protocol;
1251 	struct hh_cache	*hh = &n->hh;
1252 
1253 	write_lock_bh(&n->lock);
1254 
1255 	/* Only one thread can come in here and initialize the
1256 	 * hh_cache entry.
1257 	 */
1258 	if (!hh->hh_len)
1259 		dev->header_ops->cache(n, hh, prot);
1260 
1261 	write_unlock_bh(&n->lock);
1262 }
1263 
1264 /* This function can be used in contexts, where only old dev_queue_xmit
1265  * worked, f.e. if you want to override normal output path (eql, shaper),
1266  * but resolution is not made yet.
1267  */
1268 
1269 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1270 {
1271 	struct net_device *dev = skb->dev;
1272 
1273 	__skb_pull(skb, skb_network_offset(skb));
1274 
1275 	if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1276 			    skb->len) < 0 &&
1277 	    dev->header_ops->rebuild(skb))
1278 		return 0;
1279 
1280 	return dev_queue_xmit(skb);
1281 }
1282 EXPORT_SYMBOL(neigh_compat_output);
1283 
1284 /* Slow and careful. */
1285 
1286 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1287 {
1288 	struct dst_entry *dst = skb_dst(skb);
1289 	int rc = 0;
1290 
1291 	if (!dst)
1292 		goto discard;
1293 
1294 	if (!neigh_event_send(neigh, skb)) {
1295 		int err;
1296 		struct net_device *dev = neigh->dev;
1297 		unsigned int seq;
1298 
1299 		if (dev->header_ops->cache && !neigh->hh.hh_len)
1300 			neigh_hh_init(neigh, dst);
1301 
1302 		do {
1303 			__skb_pull(skb, skb_network_offset(skb));
1304 			seq = read_seqbegin(&neigh->ha_lock);
1305 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1306 					      neigh->ha, NULL, skb->len);
1307 		} while (read_seqretry(&neigh->ha_lock, seq));
1308 
1309 		if (err >= 0)
1310 			rc = dev_queue_xmit(skb);
1311 		else
1312 			goto out_kfree_skb;
1313 	}
1314 out:
1315 	return rc;
1316 discard:
1317 	neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1318 out_kfree_skb:
1319 	rc = -EINVAL;
1320 	kfree_skb(skb);
1321 	goto out;
1322 }
1323 EXPORT_SYMBOL(neigh_resolve_output);
1324 
1325 /* As fast as possible without hh cache */
1326 
1327 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1328 {
1329 	struct net_device *dev = neigh->dev;
1330 	unsigned int seq;
1331 	int err;
1332 
1333 	do {
1334 		__skb_pull(skb, skb_network_offset(skb));
1335 		seq = read_seqbegin(&neigh->ha_lock);
1336 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1337 				      neigh->ha, NULL, skb->len);
1338 	} while (read_seqretry(&neigh->ha_lock, seq));
1339 
1340 	if (err >= 0)
1341 		err = dev_queue_xmit(skb);
1342 	else {
1343 		err = -EINVAL;
1344 		kfree_skb(skb);
1345 	}
1346 	return err;
1347 }
1348 EXPORT_SYMBOL(neigh_connected_output);
1349 
1350 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1351 {
1352 	return dev_queue_xmit(skb);
1353 }
1354 EXPORT_SYMBOL(neigh_direct_output);
1355 
1356 static void neigh_proxy_process(unsigned long arg)
1357 {
1358 	struct neigh_table *tbl = (struct neigh_table *)arg;
1359 	long sched_next = 0;
1360 	unsigned long now = jiffies;
1361 	struct sk_buff *skb, *n;
1362 
1363 	spin_lock(&tbl->proxy_queue.lock);
1364 
1365 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1366 		long tdif = NEIGH_CB(skb)->sched_next - now;
1367 
1368 		if (tdif <= 0) {
1369 			struct net_device *dev = skb->dev;
1370 
1371 			__skb_unlink(skb, &tbl->proxy_queue);
1372 			if (tbl->proxy_redo && netif_running(dev)) {
1373 				rcu_read_lock();
1374 				tbl->proxy_redo(skb);
1375 				rcu_read_unlock();
1376 			} else {
1377 				kfree_skb(skb);
1378 			}
1379 
1380 			dev_put(dev);
1381 		} else if (!sched_next || tdif < sched_next)
1382 			sched_next = tdif;
1383 	}
1384 	del_timer(&tbl->proxy_timer);
1385 	if (sched_next)
1386 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1387 	spin_unlock(&tbl->proxy_queue.lock);
1388 }
1389 
1390 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1391 		    struct sk_buff *skb)
1392 {
1393 	unsigned long now = jiffies;
1394 	unsigned long sched_next = now + (net_random() % p->proxy_delay);
1395 
1396 	if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1397 		kfree_skb(skb);
1398 		return;
1399 	}
1400 
1401 	NEIGH_CB(skb)->sched_next = sched_next;
1402 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1403 
1404 	spin_lock(&tbl->proxy_queue.lock);
1405 	if (del_timer(&tbl->proxy_timer)) {
1406 		if (time_before(tbl->proxy_timer.expires, sched_next))
1407 			sched_next = tbl->proxy_timer.expires;
1408 	}
1409 	skb_dst_drop(skb);
1410 	dev_hold(skb->dev);
1411 	__skb_queue_tail(&tbl->proxy_queue, skb);
1412 	mod_timer(&tbl->proxy_timer, sched_next);
1413 	spin_unlock(&tbl->proxy_queue.lock);
1414 }
1415 EXPORT_SYMBOL(pneigh_enqueue);
1416 
1417 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1418 						      struct net *net, int ifindex)
1419 {
1420 	struct neigh_parms *p;
1421 
1422 	for (p = &tbl->parms; p; p = p->next) {
1423 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1424 		    (!p->dev && !ifindex && net_eq(net, &init_net)))
1425 			return p;
1426 	}
1427 
1428 	return NULL;
1429 }
1430 
1431 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1432 				      struct neigh_table *tbl)
1433 {
1434 	struct neigh_parms *p;
1435 	struct net *net = dev_net(dev);
1436 	const struct net_device_ops *ops = dev->netdev_ops;
1437 
1438 	p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1439 	if (p) {
1440 		p->tbl		  = tbl;
1441 		atomic_set(&p->refcnt, 1);
1442 		p->reachable_time =
1443 				neigh_rand_reach_time(p->base_reachable_time);
1444 		dev_hold(dev);
1445 		p->dev = dev;
1446 		write_pnet(&p->net, hold_net(net));
1447 		p->sysctl_table = NULL;
1448 
1449 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1450 			release_net(net);
1451 			dev_put(dev);
1452 			kfree(p);
1453 			return NULL;
1454 		}
1455 
1456 		write_lock_bh(&tbl->lock);
1457 		p->next		= tbl->parms.next;
1458 		tbl->parms.next = p;
1459 		write_unlock_bh(&tbl->lock);
1460 	}
1461 	return p;
1462 }
1463 EXPORT_SYMBOL(neigh_parms_alloc);
1464 
1465 static void neigh_rcu_free_parms(struct rcu_head *head)
1466 {
1467 	struct neigh_parms *parms =
1468 		container_of(head, struct neigh_parms, rcu_head);
1469 
1470 	neigh_parms_put(parms);
1471 }
1472 
1473 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1474 {
1475 	struct neigh_parms **p;
1476 
1477 	if (!parms || parms == &tbl->parms)
1478 		return;
1479 	write_lock_bh(&tbl->lock);
1480 	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1481 		if (*p == parms) {
1482 			*p = parms->next;
1483 			parms->dead = 1;
1484 			write_unlock_bh(&tbl->lock);
1485 			if (parms->dev)
1486 				dev_put(parms->dev);
1487 			call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1488 			return;
1489 		}
1490 	}
1491 	write_unlock_bh(&tbl->lock);
1492 	neigh_dbg(1, "%s: not found\n", __func__);
1493 }
1494 EXPORT_SYMBOL(neigh_parms_release);
1495 
1496 static void neigh_parms_destroy(struct neigh_parms *parms)
1497 {
1498 	release_net(neigh_parms_net(parms));
1499 	kfree(parms);
1500 }
1501 
1502 static struct lock_class_key neigh_table_proxy_queue_class;
1503 
1504 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1505 {
1506 	unsigned long now = jiffies;
1507 	unsigned long phsize;
1508 
1509 	write_pnet(&tbl->parms.net, &init_net);
1510 	atomic_set(&tbl->parms.refcnt, 1);
1511 	tbl->parms.reachable_time =
1512 			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
1513 
1514 	tbl->stats = alloc_percpu(struct neigh_statistics);
1515 	if (!tbl->stats)
1516 		panic("cannot create neighbour cache statistics");
1517 
1518 #ifdef CONFIG_PROC_FS
1519 	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1520 			      &neigh_stat_seq_fops, tbl))
1521 		panic("cannot create neighbour proc dir entry");
1522 #endif
1523 
1524 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1525 
1526 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1527 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1528 
1529 	if (!tbl->nht || !tbl->phash_buckets)
1530 		panic("cannot allocate neighbour cache hashes");
1531 
1532 	if (!tbl->entry_size)
1533 		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1534 					tbl->key_len, NEIGH_PRIV_ALIGN);
1535 	else
1536 		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1537 
1538 	rwlock_init(&tbl->lock);
1539 	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1540 	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1541 	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1542 	skb_queue_head_init_class(&tbl->proxy_queue,
1543 			&neigh_table_proxy_queue_class);
1544 
1545 	tbl->last_flush = now;
1546 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1547 }
1548 
1549 void neigh_table_init(struct neigh_table *tbl)
1550 {
1551 	struct neigh_table *tmp;
1552 
1553 	neigh_table_init_no_netlink(tbl);
1554 	write_lock(&neigh_tbl_lock);
1555 	for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1556 		if (tmp->family == tbl->family)
1557 			break;
1558 	}
1559 	tbl->next	= neigh_tables;
1560 	neigh_tables	= tbl;
1561 	write_unlock(&neigh_tbl_lock);
1562 
1563 	if (unlikely(tmp)) {
1564 		pr_err("Registering multiple tables for family %d\n",
1565 		       tbl->family);
1566 		dump_stack();
1567 	}
1568 }
1569 EXPORT_SYMBOL(neigh_table_init);
1570 
1571 int neigh_table_clear(struct neigh_table *tbl)
1572 {
1573 	struct neigh_table **tp;
1574 
1575 	/* It is not clean... Fix it to unload IPv6 module safely */
1576 	cancel_delayed_work_sync(&tbl->gc_work);
1577 	del_timer_sync(&tbl->proxy_timer);
1578 	pneigh_queue_purge(&tbl->proxy_queue);
1579 	neigh_ifdown(tbl, NULL);
1580 	if (atomic_read(&tbl->entries))
1581 		pr_crit("neighbour leakage\n");
1582 	write_lock(&neigh_tbl_lock);
1583 	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1584 		if (*tp == tbl) {
1585 			*tp = tbl->next;
1586 			break;
1587 		}
1588 	}
1589 	write_unlock(&neigh_tbl_lock);
1590 
1591 	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1592 		 neigh_hash_free_rcu);
1593 	tbl->nht = NULL;
1594 
1595 	kfree(tbl->phash_buckets);
1596 	tbl->phash_buckets = NULL;
1597 
1598 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1599 
1600 	free_percpu(tbl->stats);
1601 	tbl->stats = NULL;
1602 
1603 	return 0;
1604 }
1605 EXPORT_SYMBOL(neigh_table_clear);
1606 
1607 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1608 {
1609 	struct net *net = sock_net(skb->sk);
1610 	struct ndmsg *ndm;
1611 	struct nlattr *dst_attr;
1612 	struct neigh_table *tbl;
1613 	struct net_device *dev = NULL;
1614 	int err = -EINVAL;
1615 
1616 	ASSERT_RTNL();
1617 	if (nlmsg_len(nlh) < sizeof(*ndm))
1618 		goto out;
1619 
1620 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1621 	if (dst_attr == NULL)
1622 		goto out;
1623 
1624 	ndm = nlmsg_data(nlh);
1625 	if (ndm->ndm_ifindex) {
1626 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1627 		if (dev == NULL) {
1628 			err = -ENODEV;
1629 			goto out;
1630 		}
1631 	}
1632 
1633 	read_lock(&neigh_tbl_lock);
1634 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1635 		struct neighbour *neigh;
1636 
1637 		if (tbl->family != ndm->ndm_family)
1638 			continue;
1639 		read_unlock(&neigh_tbl_lock);
1640 
1641 		if (nla_len(dst_attr) < tbl->key_len)
1642 			goto out;
1643 
1644 		if (ndm->ndm_flags & NTF_PROXY) {
1645 			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1646 			goto out;
1647 		}
1648 
1649 		if (dev == NULL)
1650 			goto out;
1651 
1652 		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1653 		if (neigh == NULL) {
1654 			err = -ENOENT;
1655 			goto out;
1656 		}
1657 
1658 		err = neigh_update(neigh, NULL, NUD_FAILED,
1659 				   NEIGH_UPDATE_F_OVERRIDE |
1660 				   NEIGH_UPDATE_F_ADMIN);
1661 		neigh_release(neigh);
1662 		goto out;
1663 	}
1664 	read_unlock(&neigh_tbl_lock);
1665 	err = -EAFNOSUPPORT;
1666 
1667 out:
1668 	return err;
1669 }
1670 
1671 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1672 {
1673 	struct net *net = sock_net(skb->sk);
1674 	struct ndmsg *ndm;
1675 	struct nlattr *tb[NDA_MAX+1];
1676 	struct neigh_table *tbl;
1677 	struct net_device *dev = NULL;
1678 	int err;
1679 
1680 	ASSERT_RTNL();
1681 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1682 	if (err < 0)
1683 		goto out;
1684 
1685 	err = -EINVAL;
1686 	if (tb[NDA_DST] == NULL)
1687 		goto out;
1688 
1689 	ndm = nlmsg_data(nlh);
1690 	if (ndm->ndm_ifindex) {
1691 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1692 		if (dev == NULL) {
1693 			err = -ENODEV;
1694 			goto out;
1695 		}
1696 
1697 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1698 			goto out;
1699 	}
1700 
1701 	read_lock(&neigh_tbl_lock);
1702 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1703 		int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1704 		struct neighbour *neigh;
1705 		void *dst, *lladdr;
1706 
1707 		if (tbl->family != ndm->ndm_family)
1708 			continue;
1709 		read_unlock(&neigh_tbl_lock);
1710 
1711 		if (nla_len(tb[NDA_DST]) < tbl->key_len)
1712 			goto out;
1713 		dst = nla_data(tb[NDA_DST]);
1714 		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1715 
1716 		if (ndm->ndm_flags & NTF_PROXY) {
1717 			struct pneigh_entry *pn;
1718 
1719 			err = -ENOBUFS;
1720 			pn = pneigh_lookup(tbl, net, dst, dev, 1);
1721 			if (pn) {
1722 				pn->flags = ndm->ndm_flags;
1723 				err = 0;
1724 			}
1725 			goto out;
1726 		}
1727 
1728 		if (dev == NULL)
1729 			goto out;
1730 
1731 		neigh = neigh_lookup(tbl, dst, dev);
1732 		if (neigh == NULL) {
1733 			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1734 				err = -ENOENT;
1735 				goto out;
1736 			}
1737 
1738 			neigh = __neigh_lookup_errno(tbl, dst, dev);
1739 			if (IS_ERR(neigh)) {
1740 				err = PTR_ERR(neigh);
1741 				goto out;
1742 			}
1743 		} else {
1744 			if (nlh->nlmsg_flags & NLM_F_EXCL) {
1745 				err = -EEXIST;
1746 				neigh_release(neigh);
1747 				goto out;
1748 			}
1749 
1750 			if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1751 				flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1752 		}
1753 
1754 		if (ndm->ndm_flags & NTF_USE) {
1755 			neigh_event_send(neigh, NULL);
1756 			err = 0;
1757 		} else
1758 			err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1759 		neigh_release(neigh);
1760 		goto out;
1761 	}
1762 
1763 	read_unlock(&neigh_tbl_lock);
1764 	err = -EAFNOSUPPORT;
1765 out:
1766 	return err;
1767 }
1768 
1769 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1770 {
1771 	struct nlattr *nest;
1772 
1773 	nest = nla_nest_start(skb, NDTA_PARMS);
1774 	if (nest == NULL)
1775 		return -ENOBUFS;
1776 
1777 	if ((parms->dev &&
1778 	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1779 	    nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1780 	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1781 	    /* approximative value for deprecated QUEUE_LEN (in packets) */
1782 	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
1783 			parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1784 	    nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1785 	    nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1786 	    nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1787 	    nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1788 	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1789 	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1790 			  parms->base_reachable_time) ||
1791 	    nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1792 	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1793 			  parms->delay_probe_time) ||
1794 	    nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1795 	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1796 	    nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1797 	    nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1798 		goto nla_put_failure;
1799 	return nla_nest_end(skb, nest);
1800 
1801 nla_put_failure:
1802 	nla_nest_cancel(skb, nest);
1803 	return -EMSGSIZE;
1804 }
1805 
1806 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1807 			      u32 pid, u32 seq, int type, int flags)
1808 {
1809 	struct nlmsghdr *nlh;
1810 	struct ndtmsg *ndtmsg;
1811 
1812 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1813 	if (nlh == NULL)
1814 		return -EMSGSIZE;
1815 
1816 	ndtmsg = nlmsg_data(nlh);
1817 
1818 	read_lock_bh(&tbl->lock);
1819 	ndtmsg->ndtm_family = tbl->family;
1820 	ndtmsg->ndtm_pad1   = 0;
1821 	ndtmsg->ndtm_pad2   = 0;
1822 
1823 	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1824 	    nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1825 	    nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1826 	    nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1827 	    nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1828 		goto nla_put_failure;
1829 	{
1830 		unsigned long now = jiffies;
1831 		unsigned int flush_delta = now - tbl->last_flush;
1832 		unsigned int rand_delta = now - tbl->last_rand;
1833 		struct neigh_hash_table *nht;
1834 		struct ndt_config ndc = {
1835 			.ndtc_key_len		= tbl->key_len,
1836 			.ndtc_entry_size	= tbl->entry_size,
1837 			.ndtc_entries		= atomic_read(&tbl->entries),
1838 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
1839 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
1840 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
1841 		};
1842 
1843 		rcu_read_lock_bh();
1844 		nht = rcu_dereference_bh(tbl->nht);
1845 		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1846 		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1847 		rcu_read_unlock_bh();
1848 
1849 		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1850 			goto nla_put_failure;
1851 	}
1852 
1853 	{
1854 		int cpu;
1855 		struct ndt_stats ndst;
1856 
1857 		memset(&ndst, 0, sizeof(ndst));
1858 
1859 		for_each_possible_cpu(cpu) {
1860 			struct neigh_statistics	*st;
1861 
1862 			st = per_cpu_ptr(tbl->stats, cpu);
1863 			ndst.ndts_allocs		+= st->allocs;
1864 			ndst.ndts_destroys		+= st->destroys;
1865 			ndst.ndts_hash_grows		+= st->hash_grows;
1866 			ndst.ndts_res_failed		+= st->res_failed;
1867 			ndst.ndts_lookups		+= st->lookups;
1868 			ndst.ndts_hits			+= st->hits;
1869 			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
1870 			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
1871 			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
1872 			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
1873 		}
1874 
1875 		if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1876 			goto nla_put_failure;
1877 	}
1878 
1879 	BUG_ON(tbl->parms.dev);
1880 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1881 		goto nla_put_failure;
1882 
1883 	read_unlock_bh(&tbl->lock);
1884 	return nlmsg_end(skb, nlh);
1885 
1886 nla_put_failure:
1887 	read_unlock_bh(&tbl->lock);
1888 	nlmsg_cancel(skb, nlh);
1889 	return -EMSGSIZE;
1890 }
1891 
1892 static int neightbl_fill_param_info(struct sk_buff *skb,
1893 				    struct neigh_table *tbl,
1894 				    struct neigh_parms *parms,
1895 				    u32 pid, u32 seq, int type,
1896 				    unsigned int flags)
1897 {
1898 	struct ndtmsg *ndtmsg;
1899 	struct nlmsghdr *nlh;
1900 
1901 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1902 	if (nlh == NULL)
1903 		return -EMSGSIZE;
1904 
1905 	ndtmsg = nlmsg_data(nlh);
1906 
1907 	read_lock_bh(&tbl->lock);
1908 	ndtmsg->ndtm_family = tbl->family;
1909 	ndtmsg->ndtm_pad1   = 0;
1910 	ndtmsg->ndtm_pad2   = 0;
1911 
1912 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1913 	    neightbl_fill_parms(skb, parms) < 0)
1914 		goto errout;
1915 
1916 	read_unlock_bh(&tbl->lock);
1917 	return nlmsg_end(skb, nlh);
1918 errout:
1919 	read_unlock_bh(&tbl->lock);
1920 	nlmsg_cancel(skb, nlh);
1921 	return -EMSGSIZE;
1922 }
1923 
1924 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1925 	[NDTA_NAME]		= { .type = NLA_STRING },
1926 	[NDTA_THRESH1]		= { .type = NLA_U32 },
1927 	[NDTA_THRESH2]		= { .type = NLA_U32 },
1928 	[NDTA_THRESH3]		= { .type = NLA_U32 },
1929 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
1930 	[NDTA_PARMS]		= { .type = NLA_NESTED },
1931 };
1932 
1933 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1934 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
1935 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
1936 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
1937 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
1938 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
1939 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
1940 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
1941 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
1942 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
1943 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
1944 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
1945 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
1946 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
1947 };
1948 
1949 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1950 {
1951 	struct net *net = sock_net(skb->sk);
1952 	struct neigh_table *tbl;
1953 	struct ndtmsg *ndtmsg;
1954 	struct nlattr *tb[NDTA_MAX+1];
1955 	int err;
1956 
1957 	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1958 			  nl_neightbl_policy);
1959 	if (err < 0)
1960 		goto errout;
1961 
1962 	if (tb[NDTA_NAME] == NULL) {
1963 		err = -EINVAL;
1964 		goto errout;
1965 	}
1966 
1967 	ndtmsg = nlmsg_data(nlh);
1968 	read_lock(&neigh_tbl_lock);
1969 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1970 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1971 			continue;
1972 
1973 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1974 			break;
1975 	}
1976 
1977 	if (tbl == NULL) {
1978 		err = -ENOENT;
1979 		goto errout_locked;
1980 	}
1981 
1982 	/*
1983 	 * We acquire tbl->lock to be nice to the periodic timers and
1984 	 * make sure they always see a consistent set of values.
1985 	 */
1986 	write_lock_bh(&tbl->lock);
1987 
1988 	if (tb[NDTA_PARMS]) {
1989 		struct nlattr *tbp[NDTPA_MAX+1];
1990 		struct neigh_parms *p;
1991 		int i, ifindex = 0;
1992 
1993 		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1994 				       nl_ntbl_parm_policy);
1995 		if (err < 0)
1996 			goto errout_tbl_lock;
1997 
1998 		if (tbp[NDTPA_IFINDEX])
1999 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2000 
2001 		p = lookup_neigh_parms(tbl, net, ifindex);
2002 		if (p == NULL) {
2003 			err = -ENOENT;
2004 			goto errout_tbl_lock;
2005 		}
2006 
2007 		for (i = 1; i <= NDTPA_MAX; i++) {
2008 			if (tbp[i] == NULL)
2009 				continue;
2010 
2011 			switch (i) {
2012 			case NDTPA_QUEUE_LEN:
2013 				p->queue_len_bytes = nla_get_u32(tbp[i]) *
2014 						     SKB_TRUESIZE(ETH_FRAME_LEN);
2015 				break;
2016 			case NDTPA_QUEUE_LENBYTES:
2017 				p->queue_len_bytes = nla_get_u32(tbp[i]);
2018 				break;
2019 			case NDTPA_PROXY_QLEN:
2020 				p->proxy_qlen = nla_get_u32(tbp[i]);
2021 				break;
2022 			case NDTPA_APP_PROBES:
2023 				p->app_probes = nla_get_u32(tbp[i]);
2024 				break;
2025 			case NDTPA_UCAST_PROBES:
2026 				p->ucast_probes = nla_get_u32(tbp[i]);
2027 				break;
2028 			case NDTPA_MCAST_PROBES:
2029 				p->mcast_probes = nla_get_u32(tbp[i]);
2030 				break;
2031 			case NDTPA_BASE_REACHABLE_TIME:
2032 				p->base_reachable_time = nla_get_msecs(tbp[i]);
2033 				break;
2034 			case NDTPA_GC_STALETIME:
2035 				p->gc_staletime = nla_get_msecs(tbp[i]);
2036 				break;
2037 			case NDTPA_DELAY_PROBE_TIME:
2038 				p->delay_probe_time = nla_get_msecs(tbp[i]);
2039 				break;
2040 			case NDTPA_RETRANS_TIME:
2041 				p->retrans_time = nla_get_msecs(tbp[i]);
2042 				break;
2043 			case NDTPA_ANYCAST_DELAY:
2044 				p->anycast_delay = nla_get_msecs(tbp[i]);
2045 				break;
2046 			case NDTPA_PROXY_DELAY:
2047 				p->proxy_delay = nla_get_msecs(tbp[i]);
2048 				break;
2049 			case NDTPA_LOCKTIME:
2050 				p->locktime = nla_get_msecs(tbp[i]);
2051 				break;
2052 			}
2053 		}
2054 	}
2055 
2056 	err = -ENOENT;
2057 	if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2058 	     tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2059 	    !net_eq(net, &init_net))
2060 		goto errout_tbl_lock;
2061 
2062 	if (tb[NDTA_THRESH1])
2063 		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2064 
2065 	if (tb[NDTA_THRESH2])
2066 		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2067 
2068 	if (tb[NDTA_THRESH3])
2069 		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2070 
2071 	if (tb[NDTA_GC_INTERVAL])
2072 		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2073 
2074 	err = 0;
2075 
2076 errout_tbl_lock:
2077 	write_unlock_bh(&tbl->lock);
2078 errout_locked:
2079 	read_unlock(&neigh_tbl_lock);
2080 errout:
2081 	return err;
2082 }
2083 
2084 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2085 {
2086 	struct net *net = sock_net(skb->sk);
2087 	int family, tidx, nidx = 0;
2088 	int tbl_skip = cb->args[0];
2089 	int neigh_skip = cb->args[1];
2090 	struct neigh_table *tbl;
2091 
2092 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2093 
2094 	read_lock(&neigh_tbl_lock);
2095 	for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2096 		struct neigh_parms *p;
2097 
2098 		if (tidx < tbl_skip || (family && tbl->family != family))
2099 			continue;
2100 
2101 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2102 				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2103 				       NLM_F_MULTI) <= 0)
2104 			break;
2105 
2106 		for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2107 			if (!net_eq(neigh_parms_net(p), net))
2108 				continue;
2109 
2110 			if (nidx < neigh_skip)
2111 				goto next;
2112 
2113 			if (neightbl_fill_param_info(skb, tbl, p,
2114 						     NETLINK_CB(cb->skb).portid,
2115 						     cb->nlh->nlmsg_seq,
2116 						     RTM_NEWNEIGHTBL,
2117 						     NLM_F_MULTI) <= 0)
2118 				goto out;
2119 		next:
2120 			nidx++;
2121 		}
2122 
2123 		neigh_skip = 0;
2124 	}
2125 out:
2126 	read_unlock(&neigh_tbl_lock);
2127 	cb->args[0] = tidx;
2128 	cb->args[1] = nidx;
2129 
2130 	return skb->len;
2131 }
2132 
2133 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2134 			   u32 pid, u32 seq, int type, unsigned int flags)
2135 {
2136 	unsigned long now = jiffies;
2137 	struct nda_cacheinfo ci;
2138 	struct nlmsghdr *nlh;
2139 	struct ndmsg *ndm;
2140 
2141 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2142 	if (nlh == NULL)
2143 		return -EMSGSIZE;
2144 
2145 	ndm = nlmsg_data(nlh);
2146 	ndm->ndm_family	 = neigh->ops->family;
2147 	ndm->ndm_pad1    = 0;
2148 	ndm->ndm_pad2    = 0;
2149 	ndm->ndm_flags	 = neigh->flags;
2150 	ndm->ndm_type	 = neigh->type;
2151 	ndm->ndm_ifindex = neigh->dev->ifindex;
2152 
2153 	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2154 		goto nla_put_failure;
2155 
2156 	read_lock_bh(&neigh->lock);
2157 	ndm->ndm_state	 = neigh->nud_state;
2158 	if (neigh->nud_state & NUD_VALID) {
2159 		char haddr[MAX_ADDR_LEN];
2160 
2161 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2162 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2163 			read_unlock_bh(&neigh->lock);
2164 			goto nla_put_failure;
2165 		}
2166 	}
2167 
2168 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2169 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2170 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2171 	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
2172 	read_unlock_bh(&neigh->lock);
2173 
2174 	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2175 	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2176 		goto nla_put_failure;
2177 
2178 	return nlmsg_end(skb, nlh);
2179 
2180 nla_put_failure:
2181 	nlmsg_cancel(skb, nlh);
2182 	return -EMSGSIZE;
2183 }
2184 
2185 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2186 			    u32 pid, u32 seq, int type, unsigned int flags,
2187 			    struct neigh_table *tbl)
2188 {
2189 	struct nlmsghdr *nlh;
2190 	struct ndmsg *ndm;
2191 
2192 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2193 	if (nlh == NULL)
2194 		return -EMSGSIZE;
2195 
2196 	ndm = nlmsg_data(nlh);
2197 	ndm->ndm_family	 = tbl->family;
2198 	ndm->ndm_pad1    = 0;
2199 	ndm->ndm_pad2    = 0;
2200 	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
2201 	ndm->ndm_type	 = NDA_DST;
2202 	ndm->ndm_ifindex = pn->dev->ifindex;
2203 	ndm->ndm_state	 = NUD_NONE;
2204 
2205 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2206 		goto nla_put_failure;
2207 
2208 	return nlmsg_end(skb, nlh);
2209 
2210 nla_put_failure:
2211 	nlmsg_cancel(skb, nlh);
2212 	return -EMSGSIZE;
2213 }
2214 
2215 static void neigh_update_notify(struct neighbour *neigh)
2216 {
2217 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2218 	__neigh_notify(neigh, RTM_NEWNEIGH, 0);
2219 }
2220 
2221 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2222 			    struct netlink_callback *cb)
2223 {
2224 	struct net *net = sock_net(skb->sk);
2225 	struct neighbour *n;
2226 	int rc, h, s_h = cb->args[1];
2227 	int idx, s_idx = idx = cb->args[2];
2228 	struct neigh_hash_table *nht;
2229 
2230 	rcu_read_lock_bh();
2231 	nht = rcu_dereference_bh(tbl->nht);
2232 
2233 	for (h = s_h; h < (1 << nht->hash_shift); h++) {
2234 		if (h > s_h)
2235 			s_idx = 0;
2236 		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2237 		     n != NULL;
2238 		     n = rcu_dereference_bh(n->next)) {
2239 			if (!net_eq(dev_net(n->dev), net))
2240 				continue;
2241 			if (idx < s_idx)
2242 				goto next;
2243 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2244 					    cb->nlh->nlmsg_seq,
2245 					    RTM_NEWNEIGH,
2246 					    NLM_F_MULTI) <= 0) {
2247 				rc = -1;
2248 				goto out;
2249 			}
2250 next:
2251 			idx++;
2252 		}
2253 	}
2254 	rc = skb->len;
2255 out:
2256 	rcu_read_unlock_bh();
2257 	cb->args[1] = h;
2258 	cb->args[2] = idx;
2259 	return rc;
2260 }
2261 
2262 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2263 			     struct netlink_callback *cb)
2264 {
2265 	struct pneigh_entry *n;
2266 	struct net *net = sock_net(skb->sk);
2267 	int rc, h, s_h = cb->args[3];
2268 	int idx, s_idx = idx = cb->args[4];
2269 
2270 	read_lock_bh(&tbl->lock);
2271 
2272 	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2273 		if (h > s_h)
2274 			s_idx = 0;
2275 		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2276 			if (dev_net(n->dev) != net)
2277 				continue;
2278 			if (idx < s_idx)
2279 				goto next;
2280 			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2281 					    cb->nlh->nlmsg_seq,
2282 					    RTM_NEWNEIGH,
2283 					    NLM_F_MULTI, tbl) <= 0) {
2284 				read_unlock_bh(&tbl->lock);
2285 				rc = -1;
2286 				goto out;
2287 			}
2288 		next:
2289 			idx++;
2290 		}
2291 	}
2292 
2293 	read_unlock_bh(&tbl->lock);
2294 	rc = skb->len;
2295 out:
2296 	cb->args[3] = h;
2297 	cb->args[4] = idx;
2298 	return rc;
2299 
2300 }
2301 
2302 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2303 {
2304 	struct neigh_table *tbl;
2305 	int t, family, s_t;
2306 	int proxy = 0;
2307 	int err;
2308 
2309 	read_lock(&neigh_tbl_lock);
2310 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2311 
2312 	/* check for full ndmsg structure presence, family member is
2313 	 * the same for both structures
2314 	 */
2315 	if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2316 	    ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2317 		proxy = 1;
2318 
2319 	s_t = cb->args[0];
2320 
2321 	for (tbl = neigh_tables, t = 0; tbl;
2322 	     tbl = tbl->next, t++) {
2323 		if (t < s_t || (family && tbl->family != family))
2324 			continue;
2325 		if (t > s_t)
2326 			memset(&cb->args[1], 0, sizeof(cb->args) -
2327 						sizeof(cb->args[0]));
2328 		if (proxy)
2329 			err = pneigh_dump_table(tbl, skb, cb);
2330 		else
2331 			err = neigh_dump_table(tbl, skb, cb);
2332 		if (err < 0)
2333 			break;
2334 	}
2335 	read_unlock(&neigh_tbl_lock);
2336 
2337 	cb->args[0] = t;
2338 	return skb->len;
2339 }
2340 
2341 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2342 {
2343 	int chain;
2344 	struct neigh_hash_table *nht;
2345 
2346 	rcu_read_lock_bh();
2347 	nht = rcu_dereference_bh(tbl->nht);
2348 
2349 	read_lock(&tbl->lock); /* avoid resizes */
2350 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2351 		struct neighbour *n;
2352 
2353 		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2354 		     n != NULL;
2355 		     n = rcu_dereference_bh(n->next))
2356 			cb(n, cookie);
2357 	}
2358 	read_unlock(&tbl->lock);
2359 	rcu_read_unlock_bh();
2360 }
2361 EXPORT_SYMBOL(neigh_for_each);
2362 
2363 /* The tbl->lock must be held as a writer and BH disabled. */
2364 void __neigh_for_each_release(struct neigh_table *tbl,
2365 			      int (*cb)(struct neighbour *))
2366 {
2367 	int chain;
2368 	struct neigh_hash_table *nht;
2369 
2370 	nht = rcu_dereference_protected(tbl->nht,
2371 					lockdep_is_held(&tbl->lock));
2372 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2373 		struct neighbour *n;
2374 		struct neighbour __rcu **np;
2375 
2376 		np = &nht->hash_buckets[chain];
2377 		while ((n = rcu_dereference_protected(*np,
2378 					lockdep_is_held(&tbl->lock))) != NULL) {
2379 			int release;
2380 
2381 			write_lock(&n->lock);
2382 			release = cb(n);
2383 			if (release) {
2384 				rcu_assign_pointer(*np,
2385 					rcu_dereference_protected(n->next,
2386 						lockdep_is_held(&tbl->lock)));
2387 				n->dead = 1;
2388 			} else
2389 				np = &n->next;
2390 			write_unlock(&n->lock);
2391 			if (release)
2392 				neigh_cleanup_and_release(n);
2393 		}
2394 	}
2395 }
2396 EXPORT_SYMBOL(__neigh_for_each_release);
2397 
2398 #ifdef CONFIG_PROC_FS
2399 
2400 static struct neighbour *neigh_get_first(struct seq_file *seq)
2401 {
2402 	struct neigh_seq_state *state = seq->private;
2403 	struct net *net = seq_file_net(seq);
2404 	struct neigh_hash_table *nht = state->nht;
2405 	struct neighbour *n = NULL;
2406 	int bucket = state->bucket;
2407 
2408 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2409 	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2410 		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2411 
2412 		while (n) {
2413 			if (!net_eq(dev_net(n->dev), net))
2414 				goto next;
2415 			if (state->neigh_sub_iter) {
2416 				loff_t fakep = 0;
2417 				void *v;
2418 
2419 				v = state->neigh_sub_iter(state, n, &fakep);
2420 				if (!v)
2421 					goto next;
2422 			}
2423 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2424 				break;
2425 			if (n->nud_state & ~NUD_NOARP)
2426 				break;
2427 next:
2428 			n = rcu_dereference_bh(n->next);
2429 		}
2430 
2431 		if (n)
2432 			break;
2433 	}
2434 	state->bucket = bucket;
2435 
2436 	return n;
2437 }
2438 
2439 static struct neighbour *neigh_get_next(struct seq_file *seq,
2440 					struct neighbour *n,
2441 					loff_t *pos)
2442 {
2443 	struct neigh_seq_state *state = seq->private;
2444 	struct net *net = seq_file_net(seq);
2445 	struct neigh_hash_table *nht = state->nht;
2446 
2447 	if (state->neigh_sub_iter) {
2448 		void *v = state->neigh_sub_iter(state, n, pos);
2449 		if (v)
2450 			return n;
2451 	}
2452 	n = rcu_dereference_bh(n->next);
2453 
2454 	while (1) {
2455 		while (n) {
2456 			if (!net_eq(dev_net(n->dev), net))
2457 				goto next;
2458 			if (state->neigh_sub_iter) {
2459 				void *v = state->neigh_sub_iter(state, n, pos);
2460 				if (v)
2461 					return n;
2462 				goto next;
2463 			}
2464 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2465 				break;
2466 
2467 			if (n->nud_state & ~NUD_NOARP)
2468 				break;
2469 next:
2470 			n = rcu_dereference_bh(n->next);
2471 		}
2472 
2473 		if (n)
2474 			break;
2475 
2476 		if (++state->bucket >= (1 << nht->hash_shift))
2477 			break;
2478 
2479 		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2480 	}
2481 
2482 	if (n && pos)
2483 		--(*pos);
2484 	return n;
2485 }
2486 
2487 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2488 {
2489 	struct neighbour *n = neigh_get_first(seq);
2490 
2491 	if (n) {
2492 		--(*pos);
2493 		while (*pos) {
2494 			n = neigh_get_next(seq, n, pos);
2495 			if (!n)
2496 				break;
2497 		}
2498 	}
2499 	return *pos ? NULL : n;
2500 }
2501 
2502 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2503 {
2504 	struct neigh_seq_state *state = seq->private;
2505 	struct net *net = seq_file_net(seq);
2506 	struct neigh_table *tbl = state->tbl;
2507 	struct pneigh_entry *pn = NULL;
2508 	int bucket = state->bucket;
2509 
2510 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
2511 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2512 		pn = tbl->phash_buckets[bucket];
2513 		while (pn && !net_eq(pneigh_net(pn), net))
2514 			pn = pn->next;
2515 		if (pn)
2516 			break;
2517 	}
2518 	state->bucket = bucket;
2519 
2520 	return pn;
2521 }
2522 
2523 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2524 					    struct pneigh_entry *pn,
2525 					    loff_t *pos)
2526 {
2527 	struct neigh_seq_state *state = seq->private;
2528 	struct net *net = seq_file_net(seq);
2529 	struct neigh_table *tbl = state->tbl;
2530 
2531 	do {
2532 		pn = pn->next;
2533 	} while (pn && !net_eq(pneigh_net(pn), net));
2534 
2535 	while (!pn) {
2536 		if (++state->bucket > PNEIGH_HASHMASK)
2537 			break;
2538 		pn = tbl->phash_buckets[state->bucket];
2539 		while (pn && !net_eq(pneigh_net(pn), net))
2540 			pn = pn->next;
2541 		if (pn)
2542 			break;
2543 	}
2544 
2545 	if (pn && pos)
2546 		--(*pos);
2547 
2548 	return pn;
2549 }
2550 
2551 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2552 {
2553 	struct pneigh_entry *pn = pneigh_get_first(seq);
2554 
2555 	if (pn) {
2556 		--(*pos);
2557 		while (*pos) {
2558 			pn = pneigh_get_next(seq, pn, pos);
2559 			if (!pn)
2560 				break;
2561 		}
2562 	}
2563 	return *pos ? NULL : pn;
2564 }
2565 
2566 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2567 {
2568 	struct neigh_seq_state *state = seq->private;
2569 	void *rc;
2570 	loff_t idxpos = *pos;
2571 
2572 	rc = neigh_get_idx(seq, &idxpos);
2573 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2574 		rc = pneigh_get_idx(seq, &idxpos);
2575 
2576 	return rc;
2577 }
2578 
2579 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2580 	__acquires(rcu_bh)
2581 {
2582 	struct neigh_seq_state *state = seq->private;
2583 
2584 	state->tbl = tbl;
2585 	state->bucket = 0;
2586 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2587 
2588 	rcu_read_lock_bh();
2589 	state->nht = rcu_dereference_bh(tbl->nht);
2590 
2591 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2592 }
2593 EXPORT_SYMBOL(neigh_seq_start);
2594 
2595 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2596 {
2597 	struct neigh_seq_state *state;
2598 	void *rc;
2599 
2600 	if (v == SEQ_START_TOKEN) {
2601 		rc = neigh_get_first(seq);
2602 		goto out;
2603 	}
2604 
2605 	state = seq->private;
2606 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2607 		rc = neigh_get_next(seq, v, NULL);
2608 		if (rc)
2609 			goto out;
2610 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2611 			rc = pneigh_get_first(seq);
2612 	} else {
2613 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2614 		rc = pneigh_get_next(seq, v, NULL);
2615 	}
2616 out:
2617 	++(*pos);
2618 	return rc;
2619 }
2620 EXPORT_SYMBOL(neigh_seq_next);
2621 
2622 void neigh_seq_stop(struct seq_file *seq, void *v)
2623 	__releases(rcu_bh)
2624 {
2625 	rcu_read_unlock_bh();
2626 }
2627 EXPORT_SYMBOL(neigh_seq_stop);
2628 
2629 /* statistics via seq_file */
2630 
2631 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2632 {
2633 	struct neigh_table *tbl = seq->private;
2634 	int cpu;
2635 
2636 	if (*pos == 0)
2637 		return SEQ_START_TOKEN;
2638 
2639 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2640 		if (!cpu_possible(cpu))
2641 			continue;
2642 		*pos = cpu+1;
2643 		return per_cpu_ptr(tbl->stats, cpu);
2644 	}
2645 	return NULL;
2646 }
2647 
2648 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2649 {
2650 	struct neigh_table *tbl = seq->private;
2651 	int cpu;
2652 
2653 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2654 		if (!cpu_possible(cpu))
2655 			continue;
2656 		*pos = cpu+1;
2657 		return per_cpu_ptr(tbl->stats, cpu);
2658 	}
2659 	return NULL;
2660 }
2661 
2662 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2663 {
2664 
2665 }
2666 
2667 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2668 {
2669 	struct neigh_table *tbl = seq->private;
2670 	struct neigh_statistics *st = v;
2671 
2672 	if (v == SEQ_START_TOKEN) {
2673 		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2674 		return 0;
2675 	}
2676 
2677 	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2678 			"%08lx %08lx  %08lx %08lx %08lx\n",
2679 		   atomic_read(&tbl->entries),
2680 
2681 		   st->allocs,
2682 		   st->destroys,
2683 		   st->hash_grows,
2684 
2685 		   st->lookups,
2686 		   st->hits,
2687 
2688 		   st->res_failed,
2689 
2690 		   st->rcv_probes_mcast,
2691 		   st->rcv_probes_ucast,
2692 
2693 		   st->periodic_gc_runs,
2694 		   st->forced_gc_runs,
2695 		   st->unres_discards
2696 		   );
2697 
2698 	return 0;
2699 }
2700 
2701 static const struct seq_operations neigh_stat_seq_ops = {
2702 	.start	= neigh_stat_seq_start,
2703 	.next	= neigh_stat_seq_next,
2704 	.stop	= neigh_stat_seq_stop,
2705 	.show	= neigh_stat_seq_show,
2706 };
2707 
2708 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2709 {
2710 	int ret = seq_open(file, &neigh_stat_seq_ops);
2711 
2712 	if (!ret) {
2713 		struct seq_file *sf = file->private_data;
2714 		sf->private = PDE_DATA(inode);
2715 	}
2716 	return ret;
2717 };
2718 
2719 static const struct file_operations neigh_stat_seq_fops = {
2720 	.owner	 = THIS_MODULE,
2721 	.open 	 = neigh_stat_seq_open,
2722 	.read	 = seq_read,
2723 	.llseek	 = seq_lseek,
2724 	.release = seq_release,
2725 };
2726 
2727 #endif /* CONFIG_PROC_FS */
2728 
2729 static inline size_t neigh_nlmsg_size(void)
2730 {
2731 	return NLMSG_ALIGN(sizeof(struct ndmsg))
2732 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2733 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2734 	       + nla_total_size(sizeof(struct nda_cacheinfo))
2735 	       + nla_total_size(4); /* NDA_PROBES */
2736 }
2737 
2738 static void __neigh_notify(struct neighbour *n, int type, int flags)
2739 {
2740 	struct net *net = dev_net(n->dev);
2741 	struct sk_buff *skb;
2742 	int err = -ENOBUFS;
2743 
2744 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2745 	if (skb == NULL)
2746 		goto errout;
2747 
2748 	err = neigh_fill_info(skb, n, 0, 0, type, flags);
2749 	if (err < 0) {
2750 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2751 		WARN_ON(err == -EMSGSIZE);
2752 		kfree_skb(skb);
2753 		goto errout;
2754 	}
2755 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2756 	return;
2757 errout:
2758 	if (err < 0)
2759 		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2760 }
2761 
2762 void neigh_app_ns(struct neighbour *n)
2763 {
2764 	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2765 }
2766 EXPORT_SYMBOL(neigh_app_ns);
2767 
2768 #ifdef CONFIG_SYSCTL
2769 static int zero;
2770 static int int_max = INT_MAX;
2771 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2772 
2773 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2774 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2775 {
2776 	int size, ret;
2777 	struct ctl_table tmp = *ctl;
2778 
2779 	tmp.extra1 = &zero;
2780 	tmp.extra2 = &unres_qlen_max;
2781 	tmp.data = &size;
2782 
2783 	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2784 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2785 
2786 	if (write && !ret)
2787 		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2788 	return ret;
2789 }
2790 
2791 enum {
2792 	NEIGH_VAR_MCAST_PROBE,
2793 	NEIGH_VAR_UCAST_PROBE,
2794 	NEIGH_VAR_APP_PROBE,
2795 	NEIGH_VAR_RETRANS_TIME,
2796 	NEIGH_VAR_BASE_REACHABLE_TIME,
2797 	NEIGH_VAR_DELAY_PROBE_TIME,
2798 	NEIGH_VAR_GC_STALETIME,
2799 	NEIGH_VAR_QUEUE_LEN,
2800 	NEIGH_VAR_QUEUE_LEN_BYTES,
2801 	NEIGH_VAR_PROXY_QLEN,
2802 	NEIGH_VAR_ANYCAST_DELAY,
2803 	NEIGH_VAR_PROXY_DELAY,
2804 	NEIGH_VAR_LOCKTIME,
2805 	NEIGH_VAR_RETRANS_TIME_MS,
2806 	NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2807 	NEIGH_VAR_GC_INTERVAL,
2808 	NEIGH_VAR_GC_THRESH1,
2809 	NEIGH_VAR_GC_THRESH2,
2810 	NEIGH_VAR_GC_THRESH3,
2811 	NEIGH_VAR_MAX
2812 };
2813 
2814 static struct neigh_sysctl_table {
2815 	struct ctl_table_header *sysctl_header;
2816 	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2817 } neigh_sysctl_template __read_mostly = {
2818 	.neigh_vars = {
2819 		[NEIGH_VAR_MCAST_PROBE] = {
2820 			.procname	= "mcast_solicit",
2821 			.maxlen		= sizeof(int),
2822 			.mode		= 0644,
2823 			.extra1 	= &zero,
2824 			.extra2		= &int_max,
2825 			.proc_handler	= proc_dointvec_minmax,
2826 		},
2827 		[NEIGH_VAR_UCAST_PROBE] = {
2828 			.procname	= "ucast_solicit",
2829 			.maxlen		= sizeof(int),
2830 			.mode		= 0644,
2831 			.extra1 	= &zero,
2832 			.extra2		= &int_max,
2833 			.proc_handler	= proc_dointvec_minmax,
2834 		},
2835 		[NEIGH_VAR_APP_PROBE] = {
2836 			.procname	= "app_solicit",
2837 			.maxlen		= sizeof(int),
2838 			.mode		= 0644,
2839 			.extra1 	= &zero,
2840 			.extra2		= &int_max,
2841 			.proc_handler	= proc_dointvec_minmax,
2842 		},
2843 		[NEIGH_VAR_RETRANS_TIME] = {
2844 			.procname	= "retrans_time",
2845 			.maxlen		= sizeof(int),
2846 			.mode		= 0644,
2847 			.proc_handler	= proc_dointvec_userhz_jiffies,
2848 		},
2849 		[NEIGH_VAR_BASE_REACHABLE_TIME] = {
2850 			.procname	= "base_reachable_time",
2851 			.maxlen		= sizeof(int),
2852 			.mode		= 0644,
2853 			.proc_handler	= proc_dointvec_jiffies,
2854 		},
2855 		[NEIGH_VAR_DELAY_PROBE_TIME] = {
2856 			.procname	= "delay_first_probe_time",
2857 			.maxlen		= sizeof(int),
2858 			.mode		= 0644,
2859 			.proc_handler	= proc_dointvec_jiffies,
2860 		},
2861 		[NEIGH_VAR_GC_STALETIME] = {
2862 			.procname	= "gc_stale_time",
2863 			.maxlen		= sizeof(int),
2864 			.mode		= 0644,
2865 			.proc_handler	= proc_dointvec_jiffies,
2866 		},
2867 		[NEIGH_VAR_QUEUE_LEN] = {
2868 			.procname	= "unres_qlen",
2869 			.maxlen		= sizeof(int),
2870 			.mode		= 0644,
2871 			.proc_handler	= proc_unres_qlen,
2872 		},
2873 		[NEIGH_VAR_QUEUE_LEN_BYTES] = {
2874 			.procname	= "unres_qlen_bytes",
2875 			.maxlen		= sizeof(int),
2876 			.mode		= 0644,
2877 			.extra1		= &zero,
2878 			.proc_handler   = proc_dointvec_minmax,
2879 		},
2880 		[NEIGH_VAR_PROXY_QLEN] = {
2881 			.procname	= "proxy_qlen",
2882 			.maxlen		= sizeof(int),
2883 			.mode		= 0644,
2884 			.extra1 	= &zero,
2885 			.extra2		= &int_max,
2886 			.proc_handler	= proc_dointvec_minmax,
2887 		},
2888 		[NEIGH_VAR_ANYCAST_DELAY] = {
2889 			.procname	= "anycast_delay",
2890 			.maxlen		= sizeof(int),
2891 			.mode		= 0644,
2892 			.proc_handler	= proc_dointvec_userhz_jiffies,
2893 		},
2894 		[NEIGH_VAR_PROXY_DELAY] = {
2895 			.procname	= "proxy_delay",
2896 			.maxlen		= sizeof(int),
2897 			.mode		= 0644,
2898 			.proc_handler	= proc_dointvec_userhz_jiffies,
2899 		},
2900 		[NEIGH_VAR_LOCKTIME] = {
2901 			.procname	= "locktime",
2902 			.maxlen		= sizeof(int),
2903 			.mode		= 0644,
2904 			.proc_handler	= proc_dointvec_userhz_jiffies,
2905 		},
2906 		[NEIGH_VAR_RETRANS_TIME_MS] = {
2907 			.procname	= "retrans_time_ms",
2908 			.maxlen		= sizeof(int),
2909 			.mode		= 0644,
2910 			.proc_handler	= proc_dointvec_ms_jiffies,
2911 		},
2912 		[NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2913 			.procname	= "base_reachable_time_ms",
2914 			.maxlen		= sizeof(int),
2915 			.mode		= 0644,
2916 			.proc_handler	= proc_dointvec_ms_jiffies,
2917 		},
2918 		[NEIGH_VAR_GC_INTERVAL] = {
2919 			.procname	= "gc_interval",
2920 			.maxlen		= sizeof(int),
2921 			.mode		= 0644,
2922 			.proc_handler	= proc_dointvec_jiffies,
2923 		},
2924 		[NEIGH_VAR_GC_THRESH1] = {
2925 			.procname	= "gc_thresh1",
2926 			.maxlen		= sizeof(int),
2927 			.mode		= 0644,
2928 			.extra1 	= &zero,
2929 			.extra2		= &int_max,
2930 			.proc_handler	= proc_dointvec_minmax,
2931 		},
2932 		[NEIGH_VAR_GC_THRESH2] = {
2933 			.procname	= "gc_thresh2",
2934 			.maxlen		= sizeof(int),
2935 			.mode		= 0644,
2936 			.extra1 	= &zero,
2937 			.extra2		= &int_max,
2938 			.proc_handler	= proc_dointvec_minmax,
2939 		},
2940 		[NEIGH_VAR_GC_THRESH3] = {
2941 			.procname	= "gc_thresh3",
2942 			.maxlen		= sizeof(int),
2943 			.mode		= 0644,
2944 			.extra1 	= &zero,
2945 			.extra2		= &int_max,
2946 			.proc_handler	= proc_dointvec_minmax,
2947 		},
2948 		{},
2949 	},
2950 };
2951 
2952 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2953 			  char *p_name, proc_handler *handler)
2954 {
2955 	struct neigh_sysctl_table *t;
2956 	const char *dev_name_source = NULL;
2957 	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2958 
2959 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2960 	if (!t)
2961 		goto err;
2962 
2963 	t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2964 	t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2965 	t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2966 	t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2967 	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2968 	t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2969 	t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2970 	t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2971 	t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2972 	t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2973 	t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2974 	t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2975 	t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2976 	t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2977 	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2978 
2979 	if (dev) {
2980 		dev_name_source = dev->name;
2981 		/* Terminate the table early */
2982 		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2983 		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2984 	} else {
2985 		dev_name_source = "default";
2986 		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2987 		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2988 		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2989 		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2990 	}
2991 
2992 
2993 	if (handler) {
2994 		/* RetransTime */
2995 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2996 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2997 		/* ReachableTime */
2998 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2999 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
3000 		/* RetransTime (in milliseconds)*/
3001 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3002 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
3003 		/* ReachableTime (in milliseconds) */
3004 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3005 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
3006 	}
3007 
3008 	/* Don't export sysctls to unprivileged users */
3009 	if (neigh_parms_net(p)->user_ns != &init_user_ns)
3010 		t->neigh_vars[0].procname = NULL;
3011 
3012 	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3013 		p_name, dev_name_source);
3014 	t->sysctl_header =
3015 		register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3016 	if (!t->sysctl_header)
3017 		goto free;
3018 
3019 	p->sysctl_table = t;
3020 	return 0;
3021 
3022 free:
3023 	kfree(t);
3024 err:
3025 	return -ENOBUFS;
3026 }
3027 EXPORT_SYMBOL(neigh_sysctl_register);
3028 
3029 void neigh_sysctl_unregister(struct neigh_parms *p)
3030 {
3031 	if (p->sysctl_table) {
3032 		struct neigh_sysctl_table *t = p->sysctl_table;
3033 		p->sysctl_table = NULL;
3034 		unregister_net_sysctl_table(t->sysctl_header);
3035 		kfree(t);
3036 	}
3037 }
3038 EXPORT_SYMBOL(neigh_sysctl_unregister);
3039 
3040 #endif	/* CONFIG_SYSCTL */
3041 
3042 static int __init neigh_init(void)
3043 {
3044 	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3045 	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3046 	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3047 
3048 	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3049 		      NULL);
3050 	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3051 
3052 	return 0;
3053 }
3054 
3055 subsys_initcall(neigh_init);
3056 
3057