xref: /openbmc/linux/net/core/neighbour.c (revision e7065e20)
1 /*
2  *	Generic address resolution entity
3  *
4  *	Authors:
5  *	Pedro Roque		<roque@di.fc.ul.pt>
6  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *	Fixes:
14  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
15  *	Harald Welte		Add neighbour cache statistics like rtstat
16  */
17 
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 
40 #define NEIGH_DEBUG 1
41 
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46 
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
55 
56 #define PNEIGH_HASHMASK		0xF
57 
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62 
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
67 
68 /*
69    Neighbour hash table buckets are protected with rwlock tbl->lock.
70 
71    - All the scans/updates to hash buckets MUST be made under this lock.
72    - NOTHING clever should be made under this lock: no callbacks
73      to protocol backends, no attempts to send something to network.
74      It will result in deadlocks, if backend/driver wants to use neighbour
75      cache.
76    - If the entry requires some non-trivial actions, increase
77      its reference count and release table lock.
78 
79    Neighbour entries are protected:
80    - with reference count.
81    - with rwlock neigh->lock
82 
83    Reference count prevents destruction.
84 
85    neigh->lock mainly serializes ll address data and its validity state.
86    However, the same lock is used to protect another entry fields:
87     - timer
88     - resolution queue
89 
90    Again, nothing clever shall be made under neigh->lock,
91    the most complicated procedure, which we allow is dev->hard_header.
92    It is supposed, that dev->hard_header is simplistic and does
93    not make callbacks to neighbour tables.
94 
95    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96    list of neighbour tables. This list is used only in process context,
97  */
98 
99 static DEFINE_RWLOCK(neigh_tbl_lock);
100 
101 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
102 {
103 	kfree_skb(skb);
104 	return -ENETDOWN;
105 }
106 
107 static void neigh_cleanup_and_release(struct neighbour *neigh)
108 {
109 	if (neigh->parms->neigh_cleanup)
110 		neigh->parms->neigh_cleanup(neigh);
111 
112 	__neigh_notify(neigh, RTM_DELNEIGH, 0);
113 	neigh_release(neigh);
114 }
115 
116 /*
117  * It is random distribution in the interval (1/2)*base...(3/2)*base.
118  * It corresponds to default IPv6 settings and is not overridable,
119  * because it is really reasonable choice.
120  */
121 
122 unsigned long neigh_rand_reach_time(unsigned long base)
123 {
124 	return base ? (net_random() % base) + (base >> 1) : 0;
125 }
126 EXPORT_SYMBOL(neigh_rand_reach_time);
127 
128 
129 static int neigh_forced_gc(struct neigh_table *tbl)
130 {
131 	int shrunk = 0;
132 	int i;
133 	struct neigh_hash_table *nht;
134 
135 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136 
137 	write_lock_bh(&tbl->lock);
138 	nht = rcu_dereference_protected(tbl->nht,
139 					lockdep_is_held(&tbl->lock));
140 	for (i = 0; i < (1 << nht->hash_shift); i++) {
141 		struct neighbour *n;
142 		struct neighbour __rcu **np;
143 
144 		np = &nht->hash_buckets[i];
145 		while ((n = rcu_dereference_protected(*np,
146 					lockdep_is_held(&tbl->lock))) != NULL) {
147 			/* Neighbour record may be discarded if:
148 			 * - nobody refers to it.
149 			 * - it is not permanent
150 			 */
151 			write_lock(&n->lock);
152 			if (atomic_read(&n->refcnt) == 1 &&
153 			    !(n->nud_state & NUD_PERMANENT)) {
154 				rcu_assign_pointer(*np,
155 					rcu_dereference_protected(n->next,
156 						  lockdep_is_held(&tbl->lock)));
157 				n->dead = 1;
158 				shrunk	= 1;
159 				write_unlock(&n->lock);
160 				neigh_cleanup_and_release(n);
161 				continue;
162 			}
163 			write_unlock(&n->lock);
164 			np = &n->next;
165 		}
166 	}
167 
168 	tbl->last_flush = jiffies;
169 
170 	write_unlock_bh(&tbl->lock);
171 
172 	return shrunk;
173 }
174 
175 static void neigh_add_timer(struct neighbour *n, unsigned long when)
176 {
177 	neigh_hold(n);
178 	if (unlikely(mod_timer(&n->timer, when))) {
179 		printk("NEIGH: BUG, double timer add, state is %x\n",
180 		       n->nud_state);
181 		dump_stack();
182 	}
183 }
184 
185 static int neigh_del_timer(struct neighbour *n)
186 {
187 	if ((n->nud_state & NUD_IN_TIMER) &&
188 	    del_timer(&n->timer)) {
189 		neigh_release(n);
190 		return 1;
191 	}
192 	return 0;
193 }
194 
195 static void pneigh_queue_purge(struct sk_buff_head *list)
196 {
197 	struct sk_buff *skb;
198 
199 	while ((skb = skb_dequeue(list)) != NULL) {
200 		dev_put(skb->dev);
201 		kfree_skb(skb);
202 	}
203 }
204 
205 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
206 {
207 	int i;
208 	struct neigh_hash_table *nht;
209 
210 	nht = rcu_dereference_protected(tbl->nht,
211 					lockdep_is_held(&tbl->lock));
212 
213 	for (i = 0; i < (1 << nht->hash_shift); i++) {
214 		struct neighbour *n;
215 		struct neighbour __rcu **np = &nht->hash_buckets[i];
216 
217 		while ((n = rcu_dereference_protected(*np,
218 					lockdep_is_held(&tbl->lock))) != NULL) {
219 			if (dev && n->dev != dev) {
220 				np = &n->next;
221 				continue;
222 			}
223 			rcu_assign_pointer(*np,
224 				   rcu_dereference_protected(n->next,
225 						lockdep_is_held(&tbl->lock)));
226 			write_lock(&n->lock);
227 			neigh_del_timer(n);
228 			n->dead = 1;
229 
230 			if (atomic_read(&n->refcnt) != 1) {
231 				/* The most unpleasant situation.
232 				   We must destroy neighbour entry,
233 				   but someone still uses it.
234 
235 				   The destroy will be delayed until
236 				   the last user releases us, but
237 				   we must kill timers etc. and move
238 				   it to safe state.
239 				 */
240 				skb_queue_purge(&n->arp_queue);
241 				n->arp_queue_len_bytes = 0;
242 				n->output = neigh_blackhole;
243 				if (n->nud_state & NUD_VALID)
244 					n->nud_state = NUD_NOARP;
245 				else
246 					n->nud_state = NUD_NONE;
247 				NEIGH_PRINTK2("neigh %p is stray.\n", n);
248 			}
249 			write_unlock(&n->lock);
250 			neigh_cleanup_and_release(n);
251 		}
252 	}
253 }
254 
255 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
256 {
257 	write_lock_bh(&tbl->lock);
258 	neigh_flush_dev(tbl, dev);
259 	write_unlock_bh(&tbl->lock);
260 }
261 EXPORT_SYMBOL(neigh_changeaddr);
262 
263 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
264 {
265 	write_lock_bh(&tbl->lock);
266 	neigh_flush_dev(tbl, dev);
267 	pneigh_ifdown(tbl, dev);
268 	write_unlock_bh(&tbl->lock);
269 
270 	del_timer_sync(&tbl->proxy_timer);
271 	pneigh_queue_purge(&tbl->proxy_queue);
272 	return 0;
273 }
274 EXPORT_SYMBOL(neigh_ifdown);
275 
276 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
277 {
278 	struct neighbour *n = NULL;
279 	unsigned long now = jiffies;
280 	int entries;
281 
282 	entries = atomic_inc_return(&tbl->entries) - 1;
283 	if (entries >= tbl->gc_thresh3 ||
284 	    (entries >= tbl->gc_thresh2 &&
285 	     time_after(now, tbl->last_flush + 5 * HZ))) {
286 		if (!neigh_forced_gc(tbl) &&
287 		    entries >= tbl->gc_thresh3)
288 			goto out_entries;
289 	}
290 
291 	if (tbl->entry_size)
292 		n = kzalloc(tbl->entry_size, GFP_ATOMIC);
293 	else {
294 		int sz = sizeof(*n) + tbl->key_len;
295 
296 		sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
297 		sz += dev->neigh_priv_len;
298 		n = kzalloc(sz, GFP_ATOMIC);
299 	}
300 	if (!n)
301 		goto out_entries;
302 
303 	skb_queue_head_init(&n->arp_queue);
304 	rwlock_init(&n->lock);
305 	seqlock_init(&n->ha_lock);
306 	n->updated	  = n->used = now;
307 	n->nud_state	  = NUD_NONE;
308 	n->output	  = neigh_blackhole;
309 	seqlock_init(&n->hh.hh_lock);
310 	n->parms	  = neigh_parms_clone(&tbl->parms);
311 	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
312 
313 	NEIGH_CACHE_STAT_INC(tbl, allocs);
314 	n->tbl		  = tbl;
315 	atomic_set(&n->refcnt, 1);
316 	n->dead		  = 1;
317 out:
318 	return n;
319 
320 out_entries:
321 	atomic_dec(&tbl->entries);
322 	goto out;
323 }
324 
325 static void neigh_get_hash_rnd(u32 *x)
326 {
327 	get_random_bytes(x, sizeof(*x));
328 	*x |= 1;
329 }
330 
331 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
332 {
333 	size_t size = (1 << shift) * sizeof(struct neighbour *);
334 	struct neigh_hash_table *ret;
335 	struct neighbour __rcu **buckets;
336 	int i;
337 
338 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
339 	if (!ret)
340 		return NULL;
341 	if (size <= PAGE_SIZE)
342 		buckets = kzalloc(size, GFP_ATOMIC);
343 	else
344 		buckets = (struct neighbour __rcu **)
345 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
346 					   get_order(size));
347 	if (!buckets) {
348 		kfree(ret);
349 		return NULL;
350 	}
351 	ret->hash_buckets = buckets;
352 	ret->hash_shift = shift;
353 	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
354 		neigh_get_hash_rnd(&ret->hash_rnd[i]);
355 	return ret;
356 }
357 
358 static void neigh_hash_free_rcu(struct rcu_head *head)
359 {
360 	struct neigh_hash_table *nht = container_of(head,
361 						    struct neigh_hash_table,
362 						    rcu);
363 	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
364 	struct neighbour __rcu **buckets = nht->hash_buckets;
365 
366 	if (size <= PAGE_SIZE)
367 		kfree(buckets);
368 	else
369 		free_pages((unsigned long)buckets, get_order(size));
370 	kfree(nht);
371 }
372 
373 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
374 						unsigned long new_shift)
375 {
376 	unsigned int i, hash;
377 	struct neigh_hash_table *new_nht, *old_nht;
378 
379 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
380 
381 	old_nht = rcu_dereference_protected(tbl->nht,
382 					    lockdep_is_held(&tbl->lock));
383 	new_nht = neigh_hash_alloc(new_shift);
384 	if (!new_nht)
385 		return old_nht;
386 
387 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
388 		struct neighbour *n, *next;
389 
390 		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
391 						   lockdep_is_held(&tbl->lock));
392 		     n != NULL;
393 		     n = next) {
394 			hash = tbl->hash(n->primary_key, n->dev,
395 					 new_nht->hash_rnd);
396 
397 			hash >>= (32 - new_nht->hash_shift);
398 			next = rcu_dereference_protected(n->next,
399 						lockdep_is_held(&tbl->lock));
400 
401 			rcu_assign_pointer(n->next,
402 					   rcu_dereference_protected(
403 						new_nht->hash_buckets[hash],
404 						lockdep_is_held(&tbl->lock)));
405 			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
406 		}
407 	}
408 
409 	rcu_assign_pointer(tbl->nht, new_nht);
410 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
411 	return new_nht;
412 }
413 
414 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
415 			       struct net_device *dev)
416 {
417 	struct neighbour *n;
418 	int key_len = tbl->key_len;
419 	u32 hash_val;
420 	struct neigh_hash_table *nht;
421 
422 	NEIGH_CACHE_STAT_INC(tbl, lookups);
423 
424 	rcu_read_lock_bh();
425 	nht = rcu_dereference_bh(tbl->nht);
426 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
427 
428 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
429 	     n != NULL;
430 	     n = rcu_dereference_bh(n->next)) {
431 		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
432 			if (!atomic_inc_not_zero(&n->refcnt))
433 				n = NULL;
434 			NEIGH_CACHE_STAT_INC(tbl, hits);
435 			break;
436 		}
437 	}
438 
439 	rcu_read_unlock_bh();
440 	return n;
441 }
442 EXPORT_SYMBOL(neigh_lookup);
443 
444 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
445 				     const void *pkey)
446 {
447 	struct neighbour *n;
448 	int key_len = tbl->key_len;
449 	u32 hash_val;
450 	struct neigh_hash_table *nht;
451 
452 	NEIGH_CACHE_STAT_INC(tbl, lookups);
453 
454 	rcu_read_lock_bh();
455 	nht = rcu_dereference_bh(tbl->nht);
456 	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
457 
458 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
459 	     n != NULL;
460 	     n = rcu_dereference_bh(n->next)) {
461 		if (!memcmp(n->primary_key, pkey, key_len) &&
462 		    net_eq(dev_net(n->dev), net)) {
463 			if (!atomic_inc_not_zero(&n->refcnt))
464 				n = NULL;
465 			NEIGH_CACHE_STAT_INC(tbl, hits);
466 			break;
467 		}
468 	}
469 
470 	rcu_read_unlock_bh();
471 	return n;
472 }
473 EXPORT_SYMBOL(neigh_lookup_nodev);
474 
475 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
476 			       struct net_device *dev)
477 {
478 	u32 hash_val;
479 	int key_len = tbl->key_len;
480 	int error;
481 	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
482 	struct neigh_hash_table *nht;
483 
484 	if (!n) {
485 		rc = ERR_PTR(-ENOBUFS);
486 		goto out;
487 	}
488 
489 	memcpy(n->primary_key, pkey, key_len);
490 	n->dev = dev;
491 	dev_hold(dev);
492 
493 	/* Protocol specific setup. */
494 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
495 		rc = ERR_PTR(error);
496 		goto out_neigh_release;
497 	}
498 
499 	if (dev->netdev_ops->ndo_neigh_construct) {
500 		error = dev->netdev_ops->ndo_neigh_construct(n);
501 		if (error < 0) {
502 			rc = ERR_PTR(error);
503 			goto out_neigh_release;
504 		}
505 	}
506 
507 	/* Device specific setup. */
508 	if (n->parms->neigh_setup &&
509 	    (error = n->parms->neigh_setup(n)) < 0) {
510 		rc = ERR_PTR(error);
511 		goto out_neigh_release;
512 	}
513 
514 	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
515 
516 	write_lock_bh(&tbl->lock);
517 	nht = rcu_dereference_protected(tbl->nht,
518 					lockdep_is_held(&tbl->lock));
519 
520 	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
521 		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
522 
523 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
524 
525 	if (n->parms->dead) {
526 		rc = ERR_PTR(-EINVAL);
527 		goto out_tbl_unlock;
528 	}
529 
530 	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
531 					    lockdep_is_held(&tbl->lock));
532 	     n1 != NULL;
533 	     n1 = rcu_dereference_protected(n1->next,
534 			lockdep_is_held(&tbl->lock))) {
535 		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
536 			neigh_hold(n1);
537 			rc = n1;
538 			goto out_tbl_unlock;
539 		}
540 	}
541 
542 	n->dead = 0;
543 	neigh_hold(n);
544 	rcu_assign_pointer(n->next,
545 			   rcu_dereference_protected(nht->hash_buckets[hash_val],
546 						     lockdep_is_held(&tbl->lock)));
547 	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
548 	write_unlock_bh(&tbl->lock);
549 	NEIGH_PRINTK2("neigh %p is created.\n", n);
550 	rc = n;
551 out:
552 	return rc;
553 out_tbl_unlock:
554 	write_unlock_bh(&tbl->lock);
555 out_neigh_release:
556 	neigh_release(n);
557 	goto out;
558 }
559 EXPORT_SYMBOL(neigh_create);
560 
561 static u32 pneigh_hash(const void *pkey, int key_len)
562 {
563 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
564 	hash_val ^= (hash_val >> 16);
565 	hash_val ^= hash_val >> 8;
566 	hash_val ^= hash_val >> 4;
567 	hash_val &= PNEIGH_HASHMASK;
568 	return hash_val;
569 }
570 
571 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
572 					      struct net *net,
573 					      const void *pkey,
574 					      int key_len,
575 					      struct net_device *dev)
576 {
577 	while (n) {
578 		if (!memcmp(n->key, pkey, key_len) &&
579 		    net_eq(pneigh_net(n), net) &&
580 		    (n->dev == dev || !n->dev))
581 			return n;
582 		n = n->next;
583 	}
584 	return NULL;
585 }
586 
587 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
588 		struct net *net, const void *pkey, struct net_device *dev)
589 {
590 	int key_len = tbl->key_len;
591 	u32 hash_val = pneigh_hash(pkey, key_len);
592 
593 	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
594 				 net, pkey, key_len, dev);
595 }
596 EXPORT_SYMBOL_GPL(__pneigh_lookup);
597 
598 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
599 				    struct net *net, const void *pkey,
600 				    struct net_device *dev, int creat)
601 {
602 	struct pneigh_entry *n;
603 	int key_len = tbl->key_len;
604 	u32 hash_val = pneigh_hash(pkey, key_len);
605 
606 	read_lock_bh(&tbl->lock);
607 	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
608 			      net, pkey, key_len, dev);
609 	read_unlock_bh(&tbl->lock);
610 
611 	if (n || !creat)
612 		goto out;
613 
614 	ASSERT_RTNL();
615 
616 	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
617 	if (!n)
618 		goto out;
619 
620 	write_pnet(&n->net, hold_net(net));
621 	memcpy(n->key, pkey, key_len);
622 	n->dev = dev;
623 	if (dev)
624 		dev_hold(dev);
625 
626 	if (tbl->pconstructor && tbl->pconstructor(n)) {
627 		if (dev)
628 			dev_put(dev);
629 		release_net(net);
630 		kfree(n);
631 		n = NULL;
632 		goto out;
633 	}
634 
635 	write_lock_bh(&tbl->lock);
636 	n->next = tbl->phash_buckets[hash_val];
637 	tbl->phash_buckets[hash_val] = n;
638 	write_unlock_bh(&tbl->lock);
639 out:
640 	return n;
641 }
642 EXPORT_SYMBOL(pneigh_lookup);
643 
644 
645 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
646 		  struct net_device *dev)
647 {
648 	struct pneigh_entry *n, **np;
649 	int key_len = tbl->key_len;
650 	u32 hash_val = pneigh_hash(pkey, key_len);
651 
652 	write_lock_bh(&tbl->lock);
653 	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
654 	     np = &n->next) {
655 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
656 		    net_eq(pneigh_net(n), net)) {
657 			*np = n->next;
658 			write_unlock_bh(&tbl->lock);
659 			if (tbl->pdestructor)
660 				tbl->pdestructor(n);
661 			if (n->dev)
662 				dev_put(n->dev);
663 			release_net(pneigh_net(n));
664 			kfree(n);
665 			return 0;
666 		}
667 	}
668 	write_unlock_bh(&tbl->lock);
669 	return -ENOENT;
670 }
671 
672 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
673 {
674 	struct pneigh_entry *n, **np;
675 	u32 h;
676 
677 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
678 		np = &tbl->phash_buckets[h];
679 		while ((n = *np) != NULL) {
680 			if (!dev || n->dev == dev) {
681 				*np = n->next;
682 				if (tbl->pdestructor)
683 					tbl->pdestructor(n);
684 				if (n->dev)
685 					dev_put(n->dev);
686 				release_net(pneigh_net(n));
687 				kfree(n);
688 				continue;
689 			}
690 			np = &n->next;
691 		}
692 	}
693 	return -ENOENT;
694 }
695 
696 static void neigh_parms_destroy(struct neigh_parms *parms);
697 
698 static inline void neigh_parms_put(struct neigh_parms *parms)
699 {
700 	if (atomic_dec_and_test(&parms->refcnt))
701 		neigh_parms_destroy(parms);
702 }
703 
704 /*
705  *	neighbour must already be out of the table;
706  *
707  */
708 void neigh_destroy(struct neighbour *neigh)
709 {
710 	struct net_device *dev = neigh->dev;
711 
712 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
713 
714 	if (!neigh->dead) {
715 		printk(KERN_WARNING
716 		       "Destroying alive neighbour %p\n", neigh);
717 		dump_stack();
718 		return;
719 	}
720 
721 	if (neigh_del_timer(neigh))
722 		printk(KERN_WARNING "Impossible event.\n");
723 
724 	skb_queue_purge(&neigh->arp_queue);
725 	neigh->arp_queue_len_bytes = 0;
726 
727 	if (dev->netdev_ops->ndo_neigh_destroy)
728 		dev->netdev_ops->ndo_neigh_destroy(neigh);
729 
730 	dev_put(dev);
731 	neigh_parms_put(neigh->parms);
732 
733 	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
734 
735 	atomic_dec(&neigh->tbl->entries);
736 	kfree_rcu(neigh, rcu);
737 }
738 EXPORT_SYMBOL(neigh_destroy);
739 
740 /* Neighbour state is suspicious;
741    disable fast path.
742 
743    Called with write_locked neigh.
744  */
745 static void neigh_suspect(struct neighbour *neigh)
746 {
747 	NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
748 
749 	neigh->output = neigh->ops->output;
750 }
751 
752 /* Neighbour state is OK;
753    enable fast path.
754 
755    Called with write_locked neigh.
756  */
757 static void neigh_connect(struct neighbour *neigh)
758 {
759 	NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
760 
761 	neigh->output = neigh->ops->connected_output;
762 }
763 
764 static void neigh_periodic_work(struct work_struct *work)
765 {
766 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
767 	struct neighbour *n;
768 	struct neighbour __rcu **np;
769 	unsigned int i;
770 	struct neigh_hash_table *nht;
771 
772 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
773 
774 	write_lock_bh(&tbl->lock);
775 	nht = rcu_dereference_protected(tbl->nht,
776 					lockdep_is_held(&tbl->lock));
777 
778 	/*
779 	 *	periodically recompute ReachableTime from random function
780 	 */
781 
782 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
783 		struct neigh_parms *p;
784 		tbl->last_rand = jiffies;
785 		for (p = &tbl->parms; p; p = p->next)
786 			p->reachable_time =
787 				neigh_rand_reach_time(p->base_reachable_time);
788 	}
789 
790 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
791 		np = &nht->hash_buckets[i];
792 
793 		while ((n = rcu_dereference_protected(*np,
794 				lockdep_is_held(&tbl->lock))) != NULL) {
795 			unsigned int state;
796 
797 			write_lock(&n->lock);
798 
799 			state = n->nud_state;
800 			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
801 				write_unlock(&n->lock);
802 				goto next_elt;
803 			}
804 
805 			if (time_before(n->used, n->confirmed))
806 				n->used = n->confirmed;
807 
808 			if (atomic_read(&n->refcnt) == 1 &&
809 			    (state == NUD_FAILED ||
810 			     time_after(jiffies, n->used + n->parms->gc_staletime))) {
811 				*np = n->next;
812 				n->dead = 1;
813 				write_unlock(&n->lock);
814 				neigh_cleanup_and_release(n);
815 				continue;
816 			}
817 			write_unlock(&n->lock);
818 
819 next_elt:
820 			np = &n->next;
821 		}
822 		/*
823 		 * It's fine to release lock here, even if hash table
824 		 * grows while we are preempted.
825 		 */
826 		write_unlock_bh(&tbl->lock);
827 		cond_resched();
828 		write_lock_bh(&tbl->lock);
829 		nht = rcu_dereference_protected(tbl->nht,
830 						lockdep_is_held(&tbl->lock));
831 	}
832 	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
833 	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
834 	 * base_reachable_time.
835 	 */
836 	schedule_delayed_work(&tbl->gc_work,
837 			      tbl->parms.base_reachable_time >> 1);
838 	write_unlock_bh(&tbl->lock);
839 }
840 
841 static __inline__ int neigh_max_probes(struct neighbour *n)
842 {
843 	struct neigh_parms *p = n->parms;
844 	return (n->nud_state & NUD_PROBE) ?
845 		p->ucast_probes :
846 		p->ucast_probes + p->app_probes + p->mcast_probes;
847 }
848 
849 static void neigh_invalidate(struct neighbour *neigh)
850 	__releases(neigh->lock)
851 	__acquires(neigh->lock)
852 {
853 	struct sk_buff *skb;
854 
855 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
856 	NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
857 	neigh->updated = jiffies;
858 
859 	/* It is very thin place. report_unreachable is very complicated
860 	   routine. Particularly, it can hit the same neighbour entry!
861 
862 	   So that, we try to be accurate and avoid dead loop. --ANK
863 	 */
864 	while (neigh->nud_state == NUD_FAILED &&
865 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
866 		write_unlock(&neigh->lock);
867 		neigh->ops->error_report(neigh, skb);
868 		write_lock(&neigh->lock);
869 	}
870 	skb_queue_purge(&neigh->arp_queue);
871 	neigh->arp_queue_len_bytes = 0;
872 }
873 
874 static void neigh_probe(struct neighbour *neigh)
875 	__releases(neigh->lock)
876 {
877 	struct sk_buff *skb = skb_peek(&neigh->arp_queue);
878 	/* keep skb alive even if arp_queue overflows */
879 	if (skb)
880 		skb = skb_copy(skb, GFP_ATOMIC);
881 	write_unlock(&neigh->lock);
882 	neigh->ops->solicit(neigh, skb);
883 	atomic_inc(&neigh->probes);
884 	kfree_skb(skb);
885 }
886 
887 /* Called when a timer expires for a neighbour entry. */
888 
889 static void neigh_timer_handler(unsigned long arg)
890 {
891 	unsigned long now, next;
892 	struct neighbour *neigh = (struct neighbour *)arg;
893 	unsigned state;
894 	int notify = 0;
895 
896 	write_lock(&neigh->lock);
897 
898 	state = neigh->nud_state;
899 	now = jiffies;
900 	next = now + HZ;
901 
902 	if (!(state & NUD_IN_TIMER))
903 		goto out;
904 
905 	if (state & NUD_REACHABLE) {
906 		if (time_before_eq(now,
907 				   neigh->confirmed + neigh->parms->reachable_time)) {
908 			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
909 			next = neigh->confirmed + neigh->parms->reachable_time;
910 		} else if (time_before_eq(now,
911 					  neigh->used + neigh->parms->delay_probe_time)) {
912 			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
913 			neigh->nud_state = NUD_DELAY;
914 			neigh->updated = jiffies;
915 			neigh_suspect(neigh);
916 			next = now + neigh->parms->delay_probe_time;
917 		} else {
918 			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
919 			neigh->nud_state = NUD_STALE;
920 			neigh->updated = jiffies;
921 			neigh_suspect(neigh);
922 			notify = 1;
923 		}
924 	} else if (state & NUD_DELAY) {
925 		if (time_before_eq(now,
926 				   neigh->confirmed + neigh->parms->delay_probe_time)) {
927 			NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
928 			neigh->nud_state = NUD_REACHABLE;
929 			neigh->updated = jiffies;
930 			neigh_connect(neigh);
931 			notify = 1;
932 			next = neigh->confirmed + neigh->parms->reachable_time;
933 		} else {
934 			NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
935 			neigh->nud_state = NUD_PROBE;
936 			neigh->updated = jiffies;
937 			atomic_set(&neigh->probes, 0);
938 			next = now + neigh->parms->retrans_time;
939 		}
940 	} else {
941 		/* NUD_PROBE|NUD_INCOMPLETE */
942 		next = now + neigh->parms->retrans_time;
943 	}
944 
945 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
946 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
947 		neigh->nud_state = NUD_FAILED;
948 		notify = 1;
949 		neigh_invalidate(neigh);
950 	}
951 
952 	if (neigh->nud_state & NUD_IN_TIMER) {
953 		if (time_before(next, jiffies + HZ/2))
954 			next = jiffies + HZ/2;
955 		if (!mod_timer(&neigh->timer, next))
956 			neigh_hold(neigh);
957 	}
958 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
959 		neigh_probe(neigh);
960 	} else {
961 out:
962 		write_unlock(&neigh->lock);
963 	}
964 
965 	if (notify)
966 		neigh_update_notify(neigh);
967 
968 	neigh_release(neigh);
969 }
970 
971 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
972 {
973 	int rc;
974 	bool immediate_probe = false;
975 
976 	write_lock_bh(&neigh->lock);
977 
978 	rc = 0;
979 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
980 		goto out_unlock_bh;
981 
982 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
983 		if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
984 			unsigned long next, now = jiffies;
985 
986 			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
987 			neigh->nud_state     = NUD_INCOMPLETE;
988 			neigh->updated = now;
989 			next = now + max(neigh->parms->retrans_time, HZ/2);
990 			neigh_add_timer(neigh, next);
991 			immediate_probe = true;
992 		} else {
993 			neigh->nud_state = NUD_FAILED;
994 			neigh->updated = jiffies;
995 			write_unlock_bh(&neigh->lock);
996 
997 			kfree_skb(skb);
998 			return 1;
999 		}
1000 	} else if (neigh->nud_state & NUD_STALE) {
1001 		NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
1002 		neigh->nud_state = NUD_DELAY;
1003 		neigh->updated = jiffies;
1004 		neigh_add_timer(neigh,
1005 				jiffies + neigh->parms->delay_probe_time);
1006 	}
1007 
1008 	if (neigh->nud_state == NUD_INCOMPLETE) {
1009 		if (skb) {
1010 			while (neigh->arp_queue_len_bytes + skb->truesize >
1011 			       neigh->parms->queue_len_bytes) {
1012 				struct sk_buff *buff;
1013 
1014 				buff = __skb_dequeue(&neigh->arp_queue);
1015 				if (!buff)
1016 					break;
1017 				neigh->arp_queue_len_bytes -= buff->truesize;
1018 				kfree_skb(buff);
1019 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1020 			}
1021 			skb_dst_force(skb);
1022 			__skb_queue_tail(&neigh->arp_queue, skb);
1023 			neigh->arp_queue_len_bytes += skb->truesize;
1024 		}
1025 		rc = 1;
1026 	}
1027 out_unlock_bh:
1028 	if (immediate_probe)
1029 		neigh_probe(neigh);
1030 	else
1031 		write_unlock(&neigh->lock);
1032 	local_bh_enable();
1033 	return rc;
1034 }
1035 EXPORT_SYMBOL(__neigh_event_send);
1036 
1037 static void neigh_update_hhs(struct neighbour *neigh)
1038 {
1039 	struct hh_cache *hh;
1040 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1041 		= NULL;
1042 
1043 	if (neigh->dev->header_ops)
1044 		update = neigh->dev->header_ops->cache_update;
1045 
1046 	if (update) {
1047 		hh = &neigh->hh;
1048 		if (hh->hh_len) {
1049 			write_seqlock_bh(&hh->hh_lock);
1050 			update(hh, neigh->dev, neigh->ha);
1051 			write_sequnlock_bh(&hh->hh_lock);
1052 		}
1053 	}
1054 }
1055 
1056 
1057 
1058 /* Generic update routine.
1059    -- lladdr is new lladdr or NULL, if it is not supplied.
1060    -- new    is new state.
1061    -- flags
1062 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1063 				if it is different.
1064 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1065 				lladdr instead of overriding it
1066 				if it is different.
1067 				It also allows to retain current state
1068 				if lladdr is unchanged.
1069 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1070 
1071 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1072 				NTF_ROUTER flag.
1073 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1074 				a router.
1075 
1076    Caller MUST hold reference count on the entry.
1077  */
1078 
1079 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1080 		 u32 flags)
1081 {
1082 	u8 old;
1083 	int err;
1084 	int notify = 0;
1085 	struct net_device *dev;
1086 	int update_isrouter = 0;
1087 
1088 	write_lock_bh(&neigh->lock);
1089 
1090 	dev    = neigh->dev;
1091 	old    = neigh->nud_state;
1092 	err    = -EPERM;
1093 
1094 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1095 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1096 		goto out;
1097 
1098 	if (!(new & NUD_VALID)) {
1099 		neigh_del_timer(neigh);
1100 		if (old & NUD_CONNECTED)
1101 			neigh_suspect(neigh);
1102 		neigh->nud_state = new;
1103 		err = 0;
1104 		notify = old & NUD_VALID;
1105 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1106 		    (new & NUD_FAILED)) {
1107 			neigh_invalidate(neigh);
1108 			notify = 1;
1109 		}
1110 		goto out;
1111 	}
1112 
1113 	/* Compare new lladdr with cached one */
1114 	if (!dev->addr_len) {
1115 		/* First case: device needs no address. */
1116 		lladdr = neigh->ha;
1117 	} else if (lladdr) {
1118 		/* The second case: if something is already cached
1119 		   and a new address is proposed:
1120 		   - compare new & old
1121 		   - if they are different, check override flag
1122 		 */
1123 		if ((old & NUD_VALID) &&
1124 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1125 			lladdr = neigh->ha;
1126 	} else {
1127 		/* No address is supplied; if we know something,
1128 		   use it, otherwise discard the request.
1129 		 */
1130 		err = -EINVAL;
1131 		if (!(old & NUD_VALID))
1132 			goto out;
1133 		lladdr = neigh->ha;
1134 	}
1135 
1136 	if (new & NUD_CONNECTED)
1137 		neigh->confirmed = jiffies;
1138 	neigh->updated = jiffies;
1139 
1140 	/* If entry was valid and address is not changed,
1141 	   do not change entry state, if new one is STALE.
1142 	 */
1143 	err = 0;
1144 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1145 	if (old & NUD_VALID) {
1146 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1147 			update_isrouter = 0;
1148 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1149 			    (old & NUD_CONNECTED)) {
1150 				lladdr = neigh->ha;
1151 				new = NUD_STALE;
1152 			} else
1153 				goto out;
1154 		} else {
1155 			if (lladdr == neigh->ha && new == NUD_STALE &&
1156 			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1157 			     (old & NUD_CONNECTED))
1158 			    )
1159 				new = old;
1160 		}
1161 	}
1162 
1163 	if (new != old) {
1164 		neigh_del_timer(neigh);
1165 		if (new & NUD_IN_TIMER)
1166 			neigh_add_timer(neigh, (jiffies +
1167 						((new & NUD_REACHABLE) ?
1168 						 neigh->parms->reachable_time :
1169 						 0)));
1170 		neigh->nud_state = new;
1171 	}
1172 
1173 	if (lladdr != neigh->ha) {
1174 		write_seqlock(&neigh->ha_lock);
1175 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1176 		write_sequnlock(&neigh->ha_lock);
1177 		neigh_update_hhs(neigh);
1178 		if (!(new & NUD_CONNECTED))
1179 			neigh->confirmed = jiffies -
1180 				      (neigh->parms->base_reachable_time << 1);
1181 		notify = 1;
1182 	}
1183 	if (new == old)
1184 		goto out;
1185 	if (new & NUD_CONNECTED)
1186 		neigh_connect(neigh);
1187 	else
1188 		neigh_suspect(neigh);
1189 	if (!(old & NUD_VALID)) {
1190 		struct sk_buff *skb;
1191 
1192 		/* Again: avoid dead loop if something went wrong */
1193 
1194 		while (neigh->nud_state & NUD_VALID &&
1195 		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1196 			struct dst_entry *dst = skb_dst(skb);
1197 			struct neighbour *n2, *n1 = neigh;
1198 			write_unlock_bh(&neigh->lock);
1199 
1200 			rcu_read_lock();
1201 			/* On shaper/eql skb->dst->neighbour != neigh :( */
1202 			if (dst && (n2 = dst_get_neighbour_noref(dst)) != NULL)
1203 				n1 = n2;
1204 			n1->output(n1, skb);
1205 			rcu_read_unlock();
1206 
1207 			write_lock_bh(&neigh->lock);
1208 		}
1209 		skb_queue_purge(&neigh->arp_queue);
1210 		neigh->arp_queue_len_bytes = 0;
1211 	}
1212 out:
1213 	if (update_isrouter) {
1214 		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1215 			(neigh->flags | NTF_ROUTER) :
1216 			(neigh->flags & ~NTF_ROUTER);
1217 	}
1218 	write_unlock_bh(&neigh->lock);
1219 
1220 	if (notify)
1221 		neigh_update_notify(neigh);
1222 
1223 	return err;
1224 }
1225 EXPORT_SYMBOL(neigh_update);
1226 
1227 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1228 				 u8 *lladdr, void *saddr,
1229 				 struct net_device *dev)
1230 {
1231 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1232 						 lladdr || !dev->addr_len);
1233 	if (neigh)
1234 		neigh_update(neigh, lladdr, NUD_STALE,
1235 			     NEIGH_UPDATE_F_OVERRIDE);
1236 	return neigh;
1237 }
1238 EXPORT_SYMBOL(neigh_event_ns);
1239 
1240 /* called with read_lock_bh(&n->lock); */
1241 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1242 {
1243 	struct net_device *dev = dst->dev;
1244 	__be16 prot = dst->ops->protocol;
1245 	struct hh_cache	*hh = &n->hh;
1246 
1247 	write_lock_bh(&n->lock);
1248 
1249 	/* Only one thread can come in here and initialize the
1250 	 * hh_cache entry.
1251 	 */
1252 	if (!hh->hh_len)
1253 		dev->header_ops->cache(n, hh, prot);
1254 
1255 	write_unlock_bh(&n->lock);
1256 }
1257 
1258 /* This function can be used in contexts, where only old dev_queue_xmit
1259  * worked, f.e. if you want to override normal output path (eql, shaper),
1260  * but resolution is not made yet.
1261  */
1262 
1263 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1264 {
1265 	struct net_device *dev = skb->dev;
1266 
1267 	__skb_pull(skb, skb_network_offset(skb));
1268 
1269 	if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1270 			    skb->len) < 0 &&
1271 	    dev->header_ops->rebuild(skb))
1272 		return 0;
1273 
1274 	return dev_queue_xmit(skb);
1275 }
1276 EXPORT_SYMBOL(neigh_compat_output);
1277 
1278 /* Slow and careful. */
1279 
1280 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1281 {
1282 	struct dst_entry *dst = skb_dst(skb);
1283 	int rc = 0;
1284 
1285 	if (!dst)
1286 		goto discard;
1287 
1288 	__skb_pull(skb, skb_network_offset(skb));
1289 
1290 	if (!neigh_event_send(neigh, skb)) {
1291 		int err;
1292 		struct net_device *dev = neigh->dev;
1293 		unsigned int seq;
1294 
1295 		if (dev->header_ops->cache && !neigh->hh.hh_len)
1296 			neigh_hh_init(neigh, dst);
1297 
1298 		do {
1299 			seq = read_seqbegin(&neigh->ha_lock);
1300 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1301 					      neigh->ha, NULL, skb->len);
1302 		} while (read_seqretry(&neigh->ha_lock, seq));
1303 
1304 		if (err >= 0)
1305 			rc = dev_queue_xmit(skb);
1306 		else
1307 			goto out_kfree_skb;
1308 	}
1309 out:
1310 	return rc;
1311 discard:
1312 	NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1313 		      dst, neigh);
1314 out_kfree_skb:
1315 	rc = -EINVAL;
1316 	kfree_skb(skb);
1317 	goto out;
1318 }
1319 EXPORT_SYMBOL(neigh_resolve_output);
1320 
1321 /* As fast as possible without hh cache */
1322 
1323 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1324 {
1325 	struct net_device *dev = neigh->dev;
1326 	unsigned int seq;
1327 	int err;
1328 
1329 	__skb_pull(skb, skb_network_offset(skb));
1330 
1331 	do {
1332 		seq = read_seqbegin(&neigh->ha_lock);
1333 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1334 				      neigh->ha, NULL, skb->len);
1335 	} while (read_seqretry(&neigh->ha_lock, seq));
1336 
1337 	if (err >= 0)
1338 		err = dev_queue_xmit(skb);
1339 	else {
1340 		err = -EINVAL;
1341 		kfree_skb(skb);
1342 	}
1343 	return err;
1344 }
1345 EXPORT_SYMBOL(neigh_connected_output);
1346 
1347 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1348 {
1349 	return dev_queue_xmit(skb);
1350 }
1351 EXPORT_SYMBOL(neigh_direct_output);
1352 
1353 static void neigh_proxy_process(unsigned long arg)
1354 {
1355 	struct neigh_table *tbl = (struct neigh_table *)arg;
1356 	long sched_next = 0;
1357 	unsigned long now = jiffies;
1358 	struct sk_buff *skb, *n;
1359 
1360 	spin_lock(&tbl->proxy_queue.lock);
1361 
1362 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1363 		long tdif = NEIGH_CB(skb)->sched_next - now;
1364 
1365 		if (tdif <= 0) {
1366 			struct net_device *dev = skb->dev;
1367 
1368 			__skb_unlink(skb, &tbl->proxy_queue);
1369 			if (tbl->proxy_redo && netif_running(dev)) {
1370 				rcu_read_lock();
1371 				tbl->proxy_redo(skb);
1372 				rcu_read_unlock();
1373 			} else {
1374 				kfree_skb(skb);
1375 			}
1376 
1377 			dev_put(dev);
1378 		} else if (!sched_next || tdif < sched_next)
1379 			sched_next = tdif;
1380 	}
1381 	del_timer(&tbl->proxy_timer);
1382 	if (sched_next)
1383 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1384 	spin_unlock(&tbl->proxy_queue.lock);
1385 }
1386 
1387 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1388 		    struct sk_buff *skb)
1389 {
1390 	unsigned long now = jiffies;
1391 	unsigned long sched_next = now + (net_random() % p->proxy_delay);
1392 
1393 	if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1394 		kfree_skb(skb);
1395 		return;
1396 	}
1397 
1398 	NEIGH_CB(skb)->sched_next = sched_next;
1399 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1400 
1401 	spin_lock(&tbl->proxy_queue.lock);
1402 	if (del_timer(&tbl->proxy_timer)) {
1403 		if (time_before(tbl->proxy_timer.expires, sched_next))
1404 			sched_next = tbl->proxy_timer.expires;
1405 	}
1406 	skb_dst_drop(skb);
1407 	dev_hold(skb->dev);
1408 	__skb_queue_tail(&tbl->proxy_queue, skb);
1409 	mod_timer(&tbl->proxy_timer, sched_next);
1410 	spin_unlock(&tbl->proxy_queue.lock);
1411 }
1412 EXPORT_SYMBOL(pneigh_enqueue);
1413 
1414 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1415 						      struct net *net, int ifindex)
1416 {
1417 	struct neigh_parms *p;
1418 
1419 	for (p = &tbl->parms; p; p = p->next) {
1420 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1421 		    (!p->dev && !ifindex))
1422 			return p;
1423 	}
1424 
1425 	return NULL;
1426 }
1427 
1428 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1429 				      struct neigh_table *tbl)
1430 {
1431 	struct neigh_parms *p, *ref;
1432 	struct net *net = dev_net(dev);
1433 	const struct net_device_ops *ops = dev->netdev_ops;
1434 
1435 	ref = lookup_neigh_parms(tbl, net, 0);
1436 	if (!ref)
1437 		return NULL;
1438 
1439 	p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1440 	if (p) {
1441 		p->tbl		  = tbl;
1442 		atomic_set(&p->refcnt, 1);
1443 		p->reachable_time =
1444 				neigh_rand_reach_time(p->base_reachable_time);
1445 
1446 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1447 			kfree(p);
1448 			return NULL;
1449 		}
1450 
1451 		dev_hold(dev);
1452 		p->dev = dev;
1453 		write_pnet(&p->net, hold_net(net));
1454 		p->sysctl_table = NULL;
1455 		write_lock_bh(&tbl->lock);
1456 		p->next		= tbl->parms.next;
1457 		tbl->parms.next = p;
1458 		write_unlock_bh(&tbl->lock);
1459 	}
1460 	return p;
1461 }
1462 EXPORT_SYMBOL(neigh_parms_alloc);
1463 
1464 static void neigh_rcu_free_parms(struct rcu_head *head)
1465 {
1466 	struct neigh_parms *parms =
1467 		container_of(head, struct neigh_parms, rcu_head);
1468 
1469 	neigh_parms_put(parms);
1470 }
1471 
1472 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1473 {
1474 	struct neigh_parms **p;
1475 
1476 	if (!parms || parms == &tbl->parms)
1477 		return;
1478 	write_lock_bh(&tbl->lock);
1479 	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1480 		if (*p == parms) {
1481 			*p = parms->next;
1482 			parms->dead = 1;
1483 			write_unlock_bh(&tbl->lock);
1484 			if (parms->dev)
1485 				dev_put(parms->dev);
1486 			call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1487 			return;
1488 		}
1489 	}
1490 	write_unlock_bh(&tbl->lock);
1491 	NEIGH_PRINTK1("neigh_parms_release: not found\n");
1492 }
1493 EXPORT_SYMBOL(neigh_parms_release);
1494 
1495 static void neigh_parms_destroy(struct neigh_parms *parms)
1496 {
1497 	release_net(neigh_parms_net(parms));
1498 	kfree(parms);
1499 }
1500 
1501 static struct lock_class_key neigh_table_proxy_queue_class;
1502 
1503 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1504 {
1505 	unsigned long now = jiffies;
1506 	unsigned long phsize;
1507 
1508 	write_pnet(&tbl->parms.net, &init_net);
1509 	atomic_set(&tbl->parms.refcnt, 1);
1510 	tbl->parms.reachable_time =
1511 			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
1512 
1513 	tbl->stats = alloc_percpu(struct neigh_statistics);
1514 	if (!tbl->stats)
1515 		panic("cannot create neighbour cache statistics");
1516 
1517 #ifdef CONFIG_PROC_FS
1518 	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1519 			      &neigh_stat_seq_fops, tbl))
1520 		panic("cannot create neighbour proc dir entry");
1521 #endif
1522 
1523 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1524 
1525 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1526 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1527 
1528 	if (!tbl->nht || !tbl->phash_buckets)
1529 		panic("cannot allocate neighbour cache hashes");
1530 
1531 	rwlock_init(&tbl->lock);
1532 	INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1533 	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1534 	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1535 	skb_queue_head_init_class(&tbl->proxy_queue,
1536 			&neigh_table_proxy_queue_class);
1537 
1538 	tbl->last_flush = now;
1539 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1540 }
1541 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1542 
1543 void neigh_table_init(struct neigh_table *tbl)
1544 {
1545 	struct neigh_table *tmp;
1546 
1547 	neigh_table_init_no_netlink(tbl);
1548 	write_lock(&neigh_tbl_lock);
1549 	for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1550 		if (tmp->family == tbl->family)
1551 			break;
1552 	}
1553 	tbl->next	= neigh_tables;
1554 	neigh_tables	= tbl;
1555 	write_unlock(&neigh_tbl_lock);
1556 
1557 	if (unlikely(tmp)) {
1558 		printk(KERN_ERR "NEIGH: Registering multiple tables for "
1559 		       "family %d\n", tbl->family);
1560 		dump_stack();
1561 	}
1562 }
1563 EXPORT_SYMBOL(neigh_table_init);
1564 
1565 int neigh_table_clear(struct neigh_table *tbl)
1566 {
1567 	struct neigh_table **tp;
1568 
1569 	/* It is not clean... Fix it to unload IPv6 module safely */
1570 	cancel_delayed_work_sync(&tbl->gc_work);
1571 	del_timer_sync(&tbl->proxy_timer);
1572 	pneigh_queue_purge(&tbl->proxy_queue);
1573 	neigh_ifdown(tbl, NULL);
1574 	if (atomic_read(&tbl->entries))
1575 		printk(KERN_CRIT "neighbour leakage\n");
1576 	write_lock(&neigh_tbl_lock);
1577 	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1578 		if (*tp == tbl) {
1579 			*tp = tbl->next;
1580 			break;
1581 		}
1582 	}
1583 	write_unlock(&neigh_tbl_lock);
1584 
1585 	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1586 		 neigh_hash_free_rcu);
1587 	tbl->nht = NULL;
1588 
1589 	kfree(tbl->phash_buckets);
1590 	tbl->phash_buckets = NULL;
1591 
1592 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1593 
1594 	free_percpu(tbl->stats);
1595 	tbl->stats = NULL;
1596 
1597 	return 0;
1598 }
1599 EXPORT_SYMBOL(neigh_table_clear);
1600 
1601 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1602 {
1603 	struct net *net = sock_net(skb->sk);
1604 	struct ndmsg *ndm;
1605 	struct nlattr *dst_attr;
1606 	struct neigh_table *tbl;
1607 	struct net_device *dev = NULL;
1608 	int err = -EINVAL;
1609 
1610 	ASSERT_RTNL();
1611 	if (nlmsg_len(nlh) < sizeof(*ndm))
1612 		goto out;
1613 
1614 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1615 	if (dst_attr == NULL)
1616 		goto out;
1617 
1618 	ndm = nlmsg_data(nlh);
1619 	if (ndm->ndm_ifindex) {
1620 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1621 		if (dev == NULL) {
1622 			err = -ENODEV;
1623 			goto out;
1624 		}
1625 	}
1626 
1627 	read_lock(&neigh_tbl_lock);
1628 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1629 		struct neighbour *neigh;
1630 
1631 		if (tbl->family != ndm->ndm_family)
1632 			continue;
1633 		read_unlock(&neigh_tbl_lock);
1634 
1635 		if (nla_len(dst_attr) < tbl->key_len)
1636 			goto out;
1637 
1638 		if (ndm->ndm_flags & NTF_PROXY) {
1639 			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1640 			goto out;
1641 		}
1642 
1643 		if (dev == NULL)
1644 			goto out;
1645 
1646 		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1647 		if (neigh == NULL) {
1648 			err = -ENOENT;
1649 			goto out;
1650 		}
1651 
1652 		err = neigh_update(neigh, NULL, NUD_FAILED,
1653 				   NEIGH_UPDATE_F_OVERRIDE |
1654 				   NEIGH_UPDATE_F_ADMIN);
1655 		neigh_release(neigh);
1656 		goto out;
1657 	}
1658 	read_unlock(&neigh_tbl_lock);
1659 	err = -EAFNOSUPPORT;
1660 
1661 out:
1662 	return err;
1663 }
1664 
1665 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1666 {
1667 	struct net *net = sock_net(skb->sk);
1668 	struct ndmsg *ndm;
1669 	struct nlattr *tb[NDA_MAX+1];
1670 	struct neigh_table *tbl;
1671 	struct net_device *dev = NULL;
1672 	int err;
1673 
1674 	ASSERT_RTNL();
1675 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1676 	if (err < 0)
1677 		goto out;
1678 
1679 	err = -EINVAL;
1680 	if (tb[NDA_DST] == NULL)
1681 		goto out;
1682 
1683 	ndm = nlmsg_data(nlh);
1684 	if (ndm->ndm_ifindex) {
1685 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1686 		if (dev == NULL) {
1687 			err = -ENODEV;
1688 			goto out;
1689 		}
1690 
1691 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1692 			goto out;
1693 	}
1694 
1695 	read_lock(&neigh_tbl_lock);
1696 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1697 		int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1698 		struct neighbour *neigh;
1699 		void *dst, *lladdr;
1700 
1701 		if (tbl->family != ndm->ndm_family)
1702 			continue;
1703 		read_unlock(&neigh_tbl_lock);
1704 
1705 		if (nla_len(tb[NDA_DST]) < tbl->key_len)
1706 			goto out;
1707 		dst = nla_data(tb[NDA_DST]);
1708 		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1709 
1710 		if (ndm->ndm_flags & NTF_PROXY) {
1711 			struct pneigh_entry *pn;
1712 
1713 			err = -ENOBUFS;
1714 			pn = pneigh_lookup(tbl, net, dst, dev, 1);
1715 			if (pn) {
1716 				pn->flags = ndm->ndm_flags;
1717 				err = 0;
1718 			}
1719 			goto out;
1720 		}
1721 
1722 		if (dev == NULL)
1723 			goto out;
1724 
1725 		neigh = neigh_lookup(tbl, dst, dev);
1726 		if (neigh == NULL) {
1727 			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1728 				err = -ENOENT;
1729 				goto out;
1730 			}
1731 
1732 			neigh = __neigh_lookup_errno(tbl, dst, dev);
1733 			if (IS_ERR(neigh)) {
1734 				err = PTR_ERR(neigh);
1735 				goto out;
1736 			}
1737 		} else {
1738 			if (nlh->nlmsg_flags & NLM_F_EXCL) {
1739 				err = -EEXIST;
1740 				neigh_release(neigh);
1741 				goto out;
1742 			}
1743 
1744 			if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1745 				flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1746 		}
1747 
1748 		if (ndm->ndm_flags & NTF_USE) {
1749 			neigh_event_send(neigh, NULL);
1750 			err = 0;
1751 		} else
1752 			err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1753 		neigh_release(neigh);
1754 		goto out;
1755 	}
1756 
1757 	read_unlock(&neigh_tbl_lock);
1758 	err = -EAFNOSUPPORT;
1759 out:
1760 	return err;
1761 }
1762 
1763 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1764 {
1765 	struct nlattr *nest;
1766 
1767 	nest = nla_nest_start(skb, NDTA_PARMS);
1768 	if (nest == NULL)
1769 		return -ENOBUFS;
1770 
1771 	if (parms->dev)
1772 		NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1773 
1774 	NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1775 	NLA_PUT_U32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes);
1776 	/* approximative value for deprecated QUEUE_LEN (in packets) */
1777 	NLA_PUT_U32(skb, NDTPA_QUEUE_LEN,
1778 		    DIV_ROUND_UP(parms->queue_len_bytes,
1779 				 SKB_TRUESIZE(ETH_FRAME_LEN)));
1780 	NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1781 	NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1782 	NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1783 	NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1784 	NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1785 	NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1786 		      parms->base_reachable_time);
1787 	NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1788 	NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1789 	NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1790 	NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1791 	NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1792 	NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1793 
1794 	return nla_nest_end(skb, nest);
1795 
1796 nla_put_failure:
1797 	nla_nest_cancel(skb, nest);
1798 	return -EMSGSIZE;
1799 }
1800 
1801 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1802 			      u32 pid, u32 seq, int type, int flags)
1803 {
1804 	struct nlmsghdr *nlh;
1805 	struct ndtmsg *ndtmsg;
1806 
1807 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1808 	if (nlh == NULL)
1809 		return -EMSGSIZE;
1810 
1811 	ndtmsg = nlmsg_data(nlh);
1812 
1813 	read_lock_bh(&tbl->lock);
1814 	ndtmsg->ndtm_family = tbl->family;
1815 	ndtmsg->ndtm_pad1   = 0;
1816 	ndtmsg->ndtm_pad2   = 0;
1817 
1818 	NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1819 	NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1820 	NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1821 	NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1822 	NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1823 
1824 	{
1825 		unsigned long now = jiffies;
1826 		unsigned int flush_delta = now - tbl->last_flush;
1827 		unsigned int rand_delta = now - tbl->last_rand;
1828 		struct neigh_hash_table *nht;
1829 		struct ndt_config ndc = {
1830 			.ndtc_key_len		= tbl->key_len,
1831 			.ndtc_entry_size	= tbl->entry_size,
1832 			.ndtc_entries		= atomic_read(&tbl->entries),
1833 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
1834 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
1835 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
1836 		};
1837 
1838 		rcu_read_lock_bh();
1839 		nht = rcu_dereference_bh(tbl->nht);
1840 		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1841 		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1842 		rcu_read_unlock_bh();
1843 
1844 		NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1845 	}
1846 
1847 	{
1848 		int cpu;
1849 		struct ndt_stats ndst;
1850 
1851 		memset(&ndst, 0, sizeof(ndst));
1852 
1853 		for_each_possible_cpu(cpu) {
1854 			struct neigh_statistics	*st;
1855 
1856 			st = per_cpu_ptr(tbl->stats, cpu);
1857 			ndst.ndts_allocs		+= st->allocs;
1858 			ndst.ndts_destroys		+= st->destroys;
1859 			ndst.ndts_hash_grows		+= st->hash_grows;
1860 			ndst.ndts_res_failed		+= st->res_failed;
1861 			ndst.ndts_lookups		+= st->lookups;
1862 			ndst.ndts_hits			+= st->hits;
1863 			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
1864 			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
1865 			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
1866 			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
1867 		}
1868 
1869 		NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1870 	}
1871 
1872 	BUG_ON(tbl->parms.dev);
1873 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1874 		goto nla_put_failure;
1875 
1876 	read_unlock_bh(&tbl->lock);
1877 	return nlmsg_end(skb, nlh);
1878 
1879 nla_put_failure:
1880 	read_unlock_bh(&tbl->lock);
1881 	nlmsg_cancel(skb, nlh);
1882 	return -EMSGSIZE;
1883 }
1884 
1885 static int neightbl_fill_param_info(struct sk_buff *skb,
1886 				    struct neigh_table *tbl,
1887 				    struct neigh_parms *parms,
1888 				    u32 pid, u32 seq, int type,
1889 				    unsigned int flags)
1890 {
1891 	struct ndtmsg *ndtmsg;
1892 	struct nlmsghdr *nlh;
1893 
1894 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1895 	if (nlh == NULL)
1896 		return -EMSGSIZE;
1897 
1898 	ndtmsg = nlmsg_data(nlh);
1899 
1900 	read_lock_bh(&tbl->lock);
1901 	ndtmsg->ndtm_family = tbl->family;
1902 	ndtmsg->ndtm_pad1   = 0;
1903 	ndtmsg->ndtm_pad2   = 0;
1904 
1905 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1906 	    neightbl_fill_parms(skb, parms) < 0)
1907 		goto errout;
1908 
1909 	read_unlock_bh(&tbl->lock);
1910 	return nlmsg_end(skb, nlh);
1911 errout:
1912 	read_unlock_bh(&tbl->lock);
1913 	nlmsg_cancel(skb, nlh);
1914 	return -EMSGSIZE;
1915 }
1916 
1917 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1918 	[NDTA_NAME]		= { .type = NLA_STRING },
1919 	[NDTA_THRESH1]		= { .type = NLA_U32 },
1920 	[NDTA_THRESH2]		= { .type = NLA_U32 },
1921 	[NDTA_THRESH3]		= { .type = NLA_U32 },
1922 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
1923 	[NDTA_PARMS]		= { .type = NLA_NESTED },
1924 };
1925 
1926 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1927 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
1928 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
1929 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
1930 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
1931 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
1932 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
1933 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
1934 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
1935 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
1936 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
1937 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
1938 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
1939 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
1940 };
1941 
1942 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1943 {
1944 	struct net *net = sock_net(skb->sk);
1945 	struct neigh_table *tbl;
1946 	struct ndtmsg *ndtmsg;
1947 	struct nlattr *tb[NDTA_MAX+1];
1948 	int err;
1949 
1950 	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1951 			  nl_neightbl_policy);
1952 	if (err < 0)
1953 		goto errout;
1954 
1955 	if (tb[NDTA_NAME] == NULL) {
1956 		err = -EINVAL;
1957 		goto errout;
1958 	}
1959 
1960 	ndtmsg = nlmsg_data(nlh);
1961 	read_lock(&neigh_tbl_lock);
1962 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1963 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1964 			continue;
1965 
1966 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1967 			break;
1968 	}
1969 
1970 	if (tbl == NULL) {
1971 		err = -ENOENT;
1972 		goto errout_locked;
1973 	}
1974 
1975 	/*
1976 	 * We acquire tbl->lock to be nice to the periodic timers and
1977 	 * make sure they always see a consistent set of values.
1978 	 */
1979 	write_lock_bh(&tbl->lock);
1980 
1981 	if (tb[NDTA_PARMS]) {
1982 		struct nlattr *tbp[NDTPA_MAX+1];
1983 		struct neigh_parms *p;
1984 		int i, ifindex = 0;
1985 
1986 		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1987 				       nl_ntbl_parm_policy);
1988 		if (err < 0)
1989 			goto errout_tbl_lock;
1990 
1991 		if (tbp[NDTPA_IFINDEX])
1992 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1993 
1994 		p = lookup_neigh_parms(tbl, net, ifindex);
1995 		if (p == NULL) {
1996 			err = -ENOENT;
1997 			goto errout_tbl_lock;
1998 		}
1999 
2000 		for (i = 1; i <= NDTPA_MAX; i++) {
2001 			if (tbp[i] == NULL)
2002 				continue;
2003 
2004 			switch (i) {
2005 			case NDTPA_QUEUE_LEN:
2006 				p->queue_len_bytes = nla_get_u32(tbp[i]) *
2007 						     SKB_TRUESIZE(ETH_FRAME_LEN);
2008 				break;
2009 			case NDTPA_QUEUE_LENBYTES:
2010 				p->queue_len_bytes = nla_get_u32(tbp[i]);
2011 				break;
2012 			case NDTPA_PROXY_QLEN:
2013 				p->proxy_qlen = nla_get_u32(tbp[i]);
2014 				break;
2015 			case NDTPA_APP_PROBES:
2016 				p->app_probes = nla_get_u32(tbp[i]);
2017 				break;
2018 			case NDTPA_UCAST_PROBES:
2019 				p->ucast_probes = nla_get_u32(tbp[i]);
2020 				break;
2021 			case NDTPA_MCAST_PROBES:
2022 				p->mcast_probes = nla_get_u32(tbp[i]);
2023 				break;
2024 			case NDTPA_BASE_REACHABLE_TIME:
2025 				p->base_reachable_time = nla_get_msecs(tbp[i]);
2026 				break;
2027 			case NDTPA_GC_STALETIME:
2028 				p->gc_staletime = nla_get_msecs(tbp[i]);
2029 				break;
2030 			case NDTPA_DELAY_PROBE_TIME:
2031 				p->delay_probe_time = nla_get_msecs(tbp[i]);
2032 				break;
2033 			case NDTPA_RETRANS_TIME:
2034 				p->retrans_time = nla_get_msecs(tbp[i]);
2035 				break;
2036 			case NDTPA_ANYCAST_DELAY:
2037 				p->anycast_delay = nla_get_msecs(tbp[i]);
2038 				break;
2039 			case NDTPA_PROXY_DELAY:
2040 				p->proxy_delay = nla_get_msecs(tbp[i]);
2041 				break;
2042 			case NDTPA_LOCKTIME:
2043 				p->locktime = nla_get_msecs(tbp[i]);
2044 				break;
2045 			}
2046 		}
2047 	}
2048 
2049 	if (tb[NDTA_THRESH1])
2050 		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2051 
2052 	if (tb[NDTA_THRESH2])
2053 		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2054 
2055 	if (tb[NDTA_THRESH3])
2056 		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2057 
2058 	if (tb[NDTA_GC_INTERVAL])
2059 		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2060 
2061 	err = 0;
2062 
2063 errout_tbl_lock:
2064 	write_unlock_bh(&tbl->lock);
2065 errout_locked:
2066 	read_unlock(&neigh_tbl_lock);
2067 errout:
2068 	return err;
2069 }
2070 
2071 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2072 {
2073 	struct net *net = sock_net(skb->sk);
2074 	int family, tidx, nidx = 0;
2075 	int tbl_skip = cb->args[0];
2076 	int neigh_skip = cb->args[1];
2077 	struct neigh_table *tbl;
2078 
2079 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2080 
2081 	read_lock(&neigh_tbl_lock);
2082 	for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2083 		struct neigh_parms *p;
2084 
2085 		if (tidx < tbl_skip || (family && tbl->family != family))
2086 			continue;
2087 
2088 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2089 				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2090 				       NLM_F_MULTI) <= 0)
2091 			break;
2092 
2093 		for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2094 			if (!net_eq(neigh_parms_net(p), net))
2095 				continue;
2096 
2097 			if (nidx < neigh_skip)
2098 				goto next;
2099 
2100 			if (neightbl_fill_param_info(skb, tbl, p,
2101 						     NETLINK_CB(cb->skb).pid,
2102 						     cb->nlh->nlmsg_seq,
2103 						     RTM_NEWNEIGHTBL,
2104 						     NLM_F_MULTI) <= 0)
2105 				goto out;
2106 		next:
2107 			nidx++;
2108 		}
2109 
2110 		neigh_skip = 0;
2111 	}
2112 out:
2113 	read_unlock(&neigh_tbl_lock);
2114 	cb->args[0] = tidx;
2115 	cb->args[1] = nidx;
2116 
2117 	return skb->len;
2118 }
2119 
2120 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2121 			   u32 pid, u32 seq, int type, unsigned int flags)
2122 {
2123 	unsigned long now = jiffies;
2124 	struct nda_cacheinfo ci;
2125 	struct nlmsghdr *nlh;
2126 	struct ndmsg *ndm;
2127 
2128 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2129 	if (nlh == NULL)
2130 		return -EMSGSIZE;
2131 
2132 	ndm = nlmsg_data(nlh);
2133 	ndm->ndm_family	 = neigh->ops->family;
2134 	ndm->ndm_pad1    = 0;
2135 	ndm->ndm_pad2    = 0;
2136 	ndm->ndm_flags	 = neigh->flags;
2137 	ndm->ndm_type	 = neigh->type;
2138 	ndm->ndm_ifindex = neigh->dev->ifindex;
2139 
2140 	NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2141 
2142 	read_lock_bh(&neigh->lock);
2143 	ndm->ndm_state	 = neigh->nud_state;
2144 	if (neigh->nud_state & NUD_VALID) {
2145 		char haddr[MAX_ADDR_LEN];
2146 
2147 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2148 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2149 			read_unlock_bh(&neigh->lock);
2150 			goto nla_put_failure;
2151 		}
2152 	}
2153 
2154 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2155 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2156 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2157 	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
2158 	read_unlock_bh(&neigh->lock);
2159 
2160 	NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2161 	NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2162 
2163 	return nlmsg_end(skb, nlh);
2164 
2165 nla_put_failure:
2166 	nlmsg_cancel(skb, nlh);
2167 	return -EMSGSIZE;
2168 }
2169 
2170 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2171 			    u32 pid, u32 seq, int type, unsigned int flags,
2172 			    struct neigh_table *tbl)
2173 {
2174 	struct nlmsghdr *nlh;
2175 	struct ndmsg *ndm;
2176 
2177 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2178 	if (nlh == NULL)
2179 		return -EMSGSIZE;
2180 
2181 	ndm = nlmsg_data(nlh);
2182 	ndm->ndm_family	 = tbl->family;
2183 	ndm->ndm_pad1    = 0;
2184 	ndm->ndm_pad2    = 0;
2185 	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
2186 	ndm->ndm_type	 = NDA_DST;
2187 	ndm->ndm_ifindex = pn->dev->ifindex;
2188 	ndm->ndm_state	 = NUD_NONE;
2189 
2190 	NLA_PUT(skb, NDA_DST, tbl->key_len, pn->key);
2191 
2192 	return nlmsg_end(skb, nlh);
2193 
2194 nla_put_failure:
2195 	nlmsg_cancel(skb, nlh);
2196 	return -EMSGSIZE;
2197 }
2198 
2199 static void neigh_update_notify(struct neighbour *neigh)
2200 {
2201 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2202 	__neigh_notify(neigh, RTM_NEWNEIGH, 0);
2203 }
2204 
2205 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2206 			    struct netlink_callback *cb)
2207 {
2208 	struct net *net = sock_net(skb->sk);
2209 	struct neighbour *n;
2210 	int rc, h, s_h = cb->args[1];
2211 	int idx, s_idx = idx = cb->args[2];
2212 	struct neigh_hash_table *nht;
2213 
2214 	rcu_read_lock_bh();
2215 	nht = rcu_dereference_bh(tbl->nht);
2216 
2217 	for (h = 0; h < (1 << nht->hash_shift); h++) {
2218 		if (h < s_h)
2219 			continue;
2220 		if (h > s_h)
2221 			s_idx = 0;
2222 		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2223 		     n != NULL;
2224 		     n = rcu_dereference_bh(n->next)) {
2225 			if (!net_eq(dev_net(n->dev), net))
2226 				continue;
2227 			if (idx < s_idx)
2228 				goto next;
2229 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2230 					    cb->nlh->nlmsg_seq,
2231 					    RTM_NEWNEIGH,
2232 					    NLM_F_MULTI) <= 0) {
2233 				rc = -1;
2234 				goto out;
2235 			}
2236 next:
2237 			idx++;
2238 		}
2239 	}
2240 	rc = skb->len;
2241 out:
2242 	rcu_read_unlock_bh();
2243 	cb->args[1] = h;
2244 	cb->args[2] = idx;
2245 	return rc;
2246 }
2247 
2248 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2249 			     struct netlink_callback *cb)
2250 {
2251 	struct pneigh_entry *n;
2252 	struct net *net = sock_net(skb->sk);
2253 	int rc, h, s_h = cb->args[3];
2254 	int idx, s_idx = idx = cb->args[4];
2255 
2256 	read_lock_bh(&tbl->lock);
2257 
2258 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
2259 		if (h < s_h)
2260 			continue;
2261 		if (h > s_h)
2262 			s_idx = 0;
2263 		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2264 			if (dev_net(n->dev) != net)
2265 				continue;
2266 			if (idx < s_idx)
2267 				goto next;
2268 			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2269 					    cb->nlh->nlmsg_seq,
2270 					    RTM_NEWNEIGH,
2271 					    NLM_F_MULTI, tbl) <= 0) {
2272 				read_unlock_bh(&tbl->lock);
2273 				rc = -1;
2274 				goto out;
2275 			}
2276 		next:
2277 			idx++;
2278 		}
2279 	}
2280 
2281 	read_unlock_bh(&tbl->lock);
2282 	rc = skb->len;
2283 out:
2284 	cb->args[3] = h;
2285 	cb->args[4] = idx;
2286 	return rc;
2287 
2288 }
2289 
2290 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2291 {
2292 	struct neigh_table *tbl;
2293 	int t, family, s_t;
2294 	int proxy = 0;
2295 	int err = 0;
2296 
2297 	read_lock(&neigh_tbl_lock);
2298 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2299 
2300 	/* check for full ndmsg structure presence, family member is
2301 	 * the same for both structures
2302 	 */
2303 	if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2304 	    ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2305 		proxy = 1;
2306 
2307 	s_t = cb->args[0];
2308 
2309 	for (tbl = neigh_tables, t = 0; tbl && (err >= 0);
2310 	     tbl = tbl->next, t++) {
2311 		if (t < s_t || (family && tbl->family != family))
2312 			continue;
2313 		if (t > s_t)
2314 			memset(&cb->args[1], 0, sizeof(cb->args) -
2315 						sizeof(cb->args[0]));
2316 		if (proxy)
2317 			err = pneigh_dump_table(tbl, skb, cb);
2318 		else
2319 			err = neigh_dump_table(tbl, skb, cb);
2320 	}
2321 	read_unlock(&neigh_tbl_lock);
2322 
2323 	cb->args[0] = t;
2324 	return skb->len;
2325 }
2326 
2327 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2328 {
2329 	int chain;
2330 	struct neigh_hash_table *nht;
2331 
2332 	rcu_read_lock_bh();
2333 	nht = rcu_dereference_bh(tbl->nht);
2334 
2335 	read_lock(&tbl->lock); /* avoid resizes */
2336 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2337 		struct neighbour *n;
2338 
2339 		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2340 		     n != NULL;
2341 		     n = rcu_dereference_bh(n->next))
2342 			cb(n, cookie);
2343 	}
2344 	read_unlock(&tbl->lock);
2345 	rcu_read_unlock_bh();
2346 }
2347 EXPORT_SYMBOL(neigh_for_each);
2348 
2349 /* The tbl->lock must be held as a writer and BH disabled. */
2350 void __neigh_for_each_release(struct neigh_table *tbl,
2351 			      int (*cb)(struct neighbour *))
2352 {
2353 	int chain;
2354 	struct neigh_hash_table *nht;
2355 
2356 	nht = rcu_dereference_protected(tbl->nht,
2357 					lockdep_is_held(&tbl->lock));
2358 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2359 		struct neighbour *n;
2360 		struct neighbour __rcu **np;
2361 
2362 		np = &nht->hash_buckets[chain];
2363 		while ((n = rcu_dereference_protected(*np,
2364 					lockdep_is_held(&tbl->lock))) != NULL) {
2365 			int release;
2366 
2367 			write_lock(&n->lock);
2368 			release = cb(n);
2369 			if (release) {
2370 				rcu_assign_pointer(*np,
2371 					rcu_dereference_protected(n->next,
2372 						lockdep_is_held(&tbl->lock)));
2373 				n->dead = 1;
2374 			} else
2375 				np = &n->next;
2376 			write_unlock(&n->lock);
2377 			if (release)
2378 				neigh_cleanup_and_release(n);
2379 		}
2380 	}
2381 }
2382 EXPORT_SYMBOL(__neigh_for_each_release);
2383 
2384 #ifdef CONFIG_PROC_FS
2385 
2386 static struct neighbour *neigh_get_first(struct seq_file *seq)
2387 {
2388 	struct neigh_seq_state *state = seq->private;
2389 	struct net *net = seq_file_net(seq);
2390 	struct neigh_hash_table *nht = state->nht;
2391 	struct neighbour *n = NULL;
2392 	int bucket = state->bucket;
2393 
2394 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2395 	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2396 		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2397 
2398 		while (n) {
2399 			if (!net_eq(dev_net(n->dev), net))
2400 				goto next;
2401 			if (state->neigh_sub_iter) {
2402 				loff_t fakep = 0;
2403 				void *v;
2404 
2405 				v = state->neigh_sub_iter(state, n, &fakep);
2406 				if (!v)
2407 					goto next;
2408 			}
2409 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2410 				break;
2411 			if (n->nud_state & ~NUD_NOARP)
2412 				break;
2413 next:
2414 			n = rcu_dereference_bh(n->next);
2415 		}
2416 
2417 		if (n)
2418 			break;
2419 	}
2420 	state->bucket = bucket;
2421 
2422 	return n;
2423 }
2424 
2425 static struct neighbour *neigh_get_next(struct seq_file *seq,
2426 					struct neighbour *n,
2427 					loff_t *pos)
2428 {
2429 	struct neigh_seq_state *state = seq->private;
2430 	struct net *net = seq_file_net(seq);
2431 	struct neigh_hash_table *nht = state->nht;
2432 
2433 	if (state->neigh_sub_iter) {
2434 		void *v = state->neigh_sub_iter(state, n, pos);
2435 		if (v)
2436 			return n;
2437 	}
2438 	n = rcu_dereference_bh(n->next);
2439 
2440 	while (1) {
2441 		while (n) {
2442 			if (!net_eq(dev_net(n->dev), net))
2443 				goto next;
2444 			if (state->neigh_sub_iter) {
2445 				void *v = state->neigh_sub_iter(state, n, pos);
2446 				if (v)
2447 					return n;
2448 				goto next;
2449 			}
2450 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2451 				break;
2452 
2453 			if (n->nud_state & ~NUD_NOARP)
2454 				break;
2455 next:
2456 			n = rcu_dereference_bh(n->next);
2457 		}
2458 
2459 		if (n)
2460 			break;
2461 
2462 		if (++state->bucket >= (1 << nht->hash_shift))
2463 			break;
2464 
2465 		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2466 	}
2467 
2468 	if (n && pos)
2469 		--(*pos);
2470 	return n;
2471 }
2472 
2473 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2474 {
2475 	struct neighbour *n = neigh_get_first(seq);
2476 
2477 	if (n) {
2478 		--(*pos);
2479 		while (*pos) {
2480 			n = neigh_get_next(seq, n, pos);
2481 			if (!n)
2482 				break;
2483 		}
2484 	}
2485 	return *pos ? NULL : n;
2486 }
2487 
2488 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2489 {
2490 	struct neigh_seq_state *state = seq->private;
2491 	struct net *net = seq_file_net(seq);
2492 	struct neigh_table *tbl = state->tbl;
2493 	struct pneigh_entry *pn = NULL;
2494 	int bucket = state->bucket;
2495 
2496 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
2497 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2498 		pn = tbl->phash_buckets[bucket];
2499 		while (pn && !net_eq(pneigh_net(pn), net))
2500 			pn = pn->next;
2501 		if (pn)
2502 			break;
2503 	}
2504 	state->bucket = bucket;
2505 
2506 	return pn;
2507 }
2508 
2509 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2510 					    struct pneigh_entry *pn,
2511 					    loff_t *pos)
2512 {
2513 	struct neigh_seq_state *state = seq->private;
2514 	struct net *net = seq_file_net(seq);
2515 	struct neigh_table *tbl = state->tbl;
2516 
2517 	do {
2518 		pn = pn->next;
2519 	} while (pn && !net_eq(pneigh_net(pn), net));
2520 
2521 	while (!pn) {
2522 		if (++state->bucket > PNEIGH_HASHMASK)
2523 			break;
2524 		pn = tbl->phash_buckets[state->bucket];
2525 		while (pn && !net_eq(pneigh_net(pn), net))
2526 			pn = pn->next;
2527 		if (pn)
2528 			break;
2529 	}
2530 
2531 	if (pn && pos)
2532 		--(*pos);
2533 
2534 	return pn;
2535 }
2536 
2537 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2538 {
2539 	struct pneigh_entry *pn = pneigh_get_first(seq);
2540 
2541 	if (pn) {
2542 		--(*pos);
2543 		while (*pos) {
2544 			pn = pneigh_get_next(seq, pn, pos);
2545 			if (!pn)
2546 				break;
2547 		}
2548 	}
2549 	return *pos ? NULL : pn;
2550 }
2551 
2552 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2553 {
2554 	struct neigh_seq_state *state = seq->private;
2555 	void *rc;
2556 	loff_t idxpos = *pos;
2557 
2558 	rc = neigh_get_idx(seq, &idxpos);
2559 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2560 		rc = pneigh_get_idx(seq, &idxpos);
2561 
2562 	return rc;
2563 }
2564 
2565 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2566 	__acquires(rcu_bh)
2567 {
2568 	struct neigh_seq_state *state = seq->private;
2569 
2570 	state->tbl = tbl;
2571 	state->bucket = 0;
2572 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2573 
2574 	rcu_read_lock_bh();
2575 	state->nht = rcu_dereference_bh(tbl->nht);
2576 
2577 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2578 }
2579 EXPORT_SYMBOL(neigh_seq_start);
2580 
2581 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2582 {
2583 	struct neigh_seq_state *state;
2584 	void *rc;
2585 
2586 	if (v == SEQ_START_TOKEN) {
2587 		rc = neigh_get_first(seq);
2588 		goto out;
2589 	}
2590 
2591 	state = seq->private;
2592 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2593 		rc = neigh_get_next(seq, v, NULL);
2594 		if (rc)
2595 			goto out;
2596 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2597 			rc = pneigh_get_first(seq);
2598 	} else {
2599 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2600 		rc = pneigh_get_next(seq, v, NULL);
2601 	}
2602 out:
2603 	++(*pos);
2604 	return rc;
2605 }
2606 EXPORT_SYMBOL(neigh_seq_next);
2607 
2608 void neigh_seq_stop(struct seq_file *seq, void *v)
2609 	__releases(rcu_bh)
2610 {
2611 	rcu_read_unlock_bh();
2612 }
2613 EXPORT_SYMBOL(neigh_seq_stop);
2614 
2615 /* statistics via seq_file */
2616 
2617 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2618 {
2619 	struct neigh_table *tbl = seq->private;
2620 	int cpu;
2621 
2622 	if (*pos == 0)
2623 		return SEQ_START_TOKEN;
2624 
2625 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2626 		if (!cpu_possible(cpu))
2627 			continue;
2628 		*pos = cpu+1;
2629 		return per_cpu_ptr(tbl->stats, cpu);
2630 	}
2631 	return NULL;
2632 }
2633 
2634 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2635 {
2636 	struct neigh_table *tbl = seq->private;
2637 	int cpu;
2638 
2639 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2640 		if (!cpu_possible(cpu))
2641 			continue;
2642 		*pos = cpu+1;
2643 		return per_cpu_ptr(tbl->stats, cpu);
2644 	}
2645 	return NULL;
2646 }
2647 
2648 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2649 {
2650 
2651 }
2652 
2653 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2654 {
2655 	struct neigh_table *tbl = seq->private;
2656 	struct neigh_statistics *st = v;
2657 
2658 	if (v == SEQ_START_TOKEN) {
2659 		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2660 		return 0;
2661 	}
2662 
2663 	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2664 			"%08lx %08lx  %08lx %08lx %08lx\n",
2665 		   atomic_read(&tbl->entries),
2666 
2667 		   st->allocs,
2668 		   st->destroys,
2669 		   st->hash_grows,
2670 
2671 		   st->lookups,
2672 		   st->hits,
2673 
2674 		   st->res_failed,
2675 
2676 		   st->rcv_probes_mcast,
2677 		   st->rcv_probes_ucast,
2678 
2679 		   st->periodic_gc_runs,
2680 		   st->forced_gc_runs,
2681 		   st->unres_discards
2682 		   );
2683 
2684 	return 0;
2685 }
2686 
2687 static const struct seq_operations neigh_stat_seq_ops = {
2688 	.start	= neigh_stat_seq_start,
2689 	.next	= neigh_stat_seq_next,
2690 	.stop	= neigh_stat_seq_stop,
2691 	.show	= neigh_stat_seq_show,
2692 };
2693 
2694 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2695 {
2696 	int ret = seq_open(file, &neigh_stat_seq_ops);
2697 
2698 	if (!ret) {
2699 		struct seq_file *sf = file->private_data;
2700 		sf->private = PDE(inode)->data;
2701 	}
2702 	return ret;
2703 };
2704 
2705 static const struct file_operations neigh_stat_seq_fops = {
2706 	.owner	 = THIS_MODULE,
2707 	.open 	 = neigh_stat_seq_open,
2708 	.read	 = seq_read,
2709 	.llseek	 = seq_lseek,
2710 	.release = seq_release,
2711 };
2712 
2713 #endif /* CONFIG_PROC_FS */
2714 
2715 static inline size_t neigh_nlmsg_size(void)
2716 {
2717 	return NLMSG_ALIGN(sizeof(struct ndmsg))
2718 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2719 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2720 	       + nla_total_size(sizeof(struct nda_cacheinfo))
2721 	       + nla_total_size(4); /* NDA_PROBES */
2722 }
2723 
2724 static void __neigh_notify(struct neighbour *n, int type, int flags)
2725 {
2726 	struct net *net = dev_net(n->dev);
2727 	struct sk_buff *skb;
2728 	int err = -ENOBUFS;
2729 
2730 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2731 	if (skb == NULL)
2732 		goto errout;
2733 
2734 	err = neigh_fill_info(skb, n, 0, 0, type, flags);
2735 	if (err < 0) {
2736 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2737 		WARN_ON(err == -EMSGSIZE);
2738 		kfree_skb(skb);
2739 		goto errout;
2740 	}
2741 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2742 	return;
2743 errout:
2744 	if (err < 0)
2745 		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2746 }
2747 
2748 #ifdef CONFIG_ARPD
2749 void neigh_app_ns(struct neighbour *n)
2750 {
2751 	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2752 }
2753 EXPORT_SYMBOL(neigh_app_ns);
2754 #endif /* CONFIG_ARPD */
2755 
2756 #ifdef CONFIG_SYSCTL
2757 
2758 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2759 			   size_t *lenp, loff_t *ppos)
2760 {
2761 	int size, ret;
2762 	ctl_table tmp = *ctl;
2763 
2764 	tmp.data = &size;
2765 	size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
2766 	ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2767 	if (write && !ret)
2768 		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2769 	return ret;
2770 }
2771 
2772 enum {
2773 	NEIGH_VAR_MCAST_PROBE,
2774 	NEIGH_VAR_UCAST_PROBE,
2775 	NEIGH_VAR_APP_PROBE,
2776 	NEIGH_VAR_RETRANS_TIME,
2777 	NEIGH_VAR_BASE_REACHABLE_TIME,
2778 	NEIGH_VAR_DELAY_PROBE_TIME,
2779 	NEIGH_VAR_GC_STALETIME,
2780 	NEIGH_VAR_QUEUE_LEN,
2781 	NEIGH_VAR_QUEUE_LEN_BYTES,
2782 	NEIGH_VAR_PROXY_QLEN,
2783 	NEIGH_VAR_ANYCAST_DELAY,
2784 	NEIGH_VAR_PROXY_DELAY,
2785 	NEIGH_VAR_LOCKTIME,
2786 	NEIGH_VAR_RETRANS_TIME_MS,
2787 	NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2788 	NEIGH_VAR_GC_INTERVAL,
2789 	NEIGH_VAR_GC_THRESH1,
2790 	NEIGH_VAR_GC_THRESH2,
2791 	NEIGH_VAR_GC_THRESH3,
2792 	NEIGH_VAR_MAX
2793 };
2794 
2795 static struct neigh_sysctl_table {
2796 	struct ctl_table_header *sysctl_header;
2797 	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2798 	char *dev_name;
2799 } neigh_sysctl_template __read_mostly = {
2800 	.neigh_vars = {
2801 		[NEIGH_VAR_MCAST_PROBE] = {
2802 			.procname	= "mcast_solicit",
2803 			.maxlen		= sizeof(int),
2804 			.mode		= 0644,
2805 			.proc_handler	= proc_dointvec,
2806 		},
2807 		[NEIGH_VAR_UCAST_PROBE] = {
2808 			.procname	= "ucast_solicit",
2809 			.maxlen		= sizeof(int),
2810 			.mode		= 0644,
2811 			.proc_handler	= proc_dointvec,
2812 		},
2813 		[NEIGH_VAR_APP_PROBE] = {
2814 			.procname	= "app_solicit",
2815 			.maxlen		= sizeof(int),
2816 			.mode		= 0644,
2817 			.proc_handler	= proc_dointvec,
2818 		},
2819 		[NEIGH_VAR_RETRANS_TIME] = {
2820 			.procname	= "retrans_time",
2821 			.maxlen		= sizeof(int),
2822 			.mode		= 0644,
2823 			.proc_handler	= proc_dointvec_userhz_jiffies,
2824 		},
2825 		[NEIGH_VAR_BASE_REACHABLE_TIME] = {
2826 			.procname	= "base_reachable_time",
2827 			.maxlen		= sizeof(int),
2828 			.mode		= 0644,
2829 			.proc_handler	= proc_dointvec_jiffies,
2830 		},
2831 		[NEIGH_VAR_DELAY_PROBE_TIME] = {
2832 			.procname	= "delay_first_probe_time",
2833 			.maxlen		= sizeof(int),
2834 			.mode		= 0644,
2835 			.proc_handler	= proc_dointvec_jiffies,
2836 		},
2837 		[NEIGH_VAR_GC_STALETIME] = {
2838 			.procname	= "gc_stale_time",
2839 			.maxlen		= sizeof(int),
2840 			.mode		= 0644,
2841 			.proc_handler	= proc_dointvec_jiffies,
2842 		},
2843 		[NEIGH_VAR_QUEUE_LEN] = {
2844 			.procname	= "unres_qlen",
2845 			.maxlen		= sizeof(int),
2846 			.mode		= 0644,
2847 			.proc_handler	= proc_unres_qlen,
2848 		},
2849 		[NEIGH_VAR_QUEUE_LEN_BYTES] = {
2850 			.procname	= "unres_qlen_bytes",
2851 			.maxlen		= sizeof(int),
2852 			.mode		= 0644,
2853 			.proc_handler	= proc_dointvec,
2854 		},
2855 		[NEIGH_VAR_PROXY_QLEN] = {
2856 			.procname	= "proxy_qlen",
2857 			.maxlen		= sizeof(int),
2858 			.mode		= 0644,
2859 			.proc_handler	= proc_dointvec,
2860 		},
2861 		[NEIGH_VAR_ANYCAST_DELAY] = {
2862 			.procname	= "anycast_delay",
2863 			.maxlen		= sizeof(int),
2864 			.mode		= 0644,
2865 			.proc_handler	= proc_dointvec_userhz_jiffies,
2866 		},
2867 		[NEIGH_VAR_PROXY_DELAY] = {
2868 			.procname	= "proxy_delay",
2869 			.maxlen		= sizeof(int),
2870 			.mode		= 0644,
2871 			.proc_handler	= proc_dointvec_userhz_jiffies,
2872 		},
2873 		[NEIGH_VAR_LOCKTIME] = {
2874 			.procname	= "locktime",
2875 			.maxlen		= sizeof(int),
2876 			.mode		= 0644,
2877 			.proc_handler	= proc_dointvec_userhz_jiffies,
2878 		},
2879 		[NEIGH_VAR_RETRANS_TIME_MS] = {
2880 			.procname	= "retrans_time_ms",
2881 			.maxlen		= sizeof(int),
2882 			.mode		= 0644,
2883 			.proc_handler	= proc_dointvec_ms_jiffies,
2884 		},
2885 		[NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2886 			.procname	= "base_reachable_time_ms",
2887 			.maxlen		= sizeof(int),
2888 			.mode		= 0644,
2889 			.proc_handler	= proc_dointvec_ms_jiffies,
2890 		},
2891 		[NEIGH_VAR_GC_INTERVAL] = {
2892 			.procname	= "gc_interval",
2893 			.maxlen		= sizeof(int),
2894 			.mode		= 0644,
2895 			.proc_handler	= proc_dointvec_jiffies,
2896 		},
2897 		[NEIGH_VAR_GC_THRESH1] = {
2898 			.procname	= "gc_thresh1",
2899 			.maxlen		= sizeof(int),
2900 			.mode		= 0644,
2901 			.proc_handler	= proc_dointvec,
2902 		},
2903 		[NEIGH_VAR_GC_THRESH2] = {
2904 			.procname	= "gc_thresh2",
2905 			.maxlen		= sizeof(int),
2906 			.mode		= 0644,
2907 			.proc_handler	= proc_dointvec,
2908 		},
2909 		[NEIGH_VAR_GC_THRESH3] = {
2910 			.procname	= "gc_thresh3",
2911 			.maxlen		= sizeof(int),
2912 			.mode		= 0644,
2913 			.proc_handler	= proc_dointvec,
2914 		},
2915 		{},
2916 	},
2917 };
2918 
2919 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2920 			  char *p_name, proc_handler *handler)
2921 {
2922 	struct neigh_sysctl_table *t;
2923 	const char *dev_name_source = NULL;
2924 
2925 #define NEIGH_CTL_PATH_ROOT	0
2926 #define NEIGH_CTL_PATH_PROTO	1
2927 #define NEIGH_CTL_PATH_NEIGH	2
2928 #define NEIGH_CTL_PATH_DEV	3
2929 
2930 	struct ctl_path neigh_path[] = {
2931 		{ .procname = "net",	 },
2932 		{ .procname = "proto",	 },
2933 		{ .procname = "neigh",	 },
2934 		{ .procname = "default", },
2935 		{ },
2936 	};
2937 
2938 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2939 	if (!t)
2940 		goto err;
2941 
2942 	t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2943 	t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2944 	t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2945 	t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2946 	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2947 	t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2948 	t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2949 	t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2950 	t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2951 	t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2952 	t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2953 	t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2954 	t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2955 	t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2956 	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2957 
2958 	if (dev) {
2959 		dev_name_source = dev->name;
2960 		/* Terminate the table early */
2961 		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2962 		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2963 	} else {
2964 		dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2965 		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2966 		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2967 		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2968 		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2969 	}
2970 
2971 
2972 	if (handler) {
2973 		/* RetransTime */
2974 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2975 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2976 		/* ReachableTime */
2977 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2978 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2979 		/* RetransTime (in milliseconds)*/
2980 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2981 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2982 		/* ReachableTime (in milliseconds) */
2983 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2984 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2985 	}
2986 
2987 	t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2988 	if (!t->dev_name)
2989 		goto free;
2990 
2991 	neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2992 	neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2993 
2994 	t->sysctl_header =
2995 		register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2996 	if (!t->sysctl_header)
2997 		goto free_procname;
2998 
2999 	p->sysctl_table = t;
3000 	return 0;
3001 
3002 free_procname:
3003 	kfree(t->dev_name);
3004 free:
3005 	kfree(t);
3006 err:
3007 	return -ENOBUFS;
3008 }
3009 EXPORT_SYMBOL(neigh_sysctl_register);
3010 
3011 void neigh_sysctl_unregister(struct neigh_parms *p)
3012 {
3013 	if (p->sysctl_table) {
3014 		struct neigh_sysctl_table *t = p->sysctl_table;
3015 		p->sysctl_table = NULL;
3016 		unregister_sysctl_table(t->sysctl_header);
3017 		kfree(t->dev_name);
3018 		kfree(t);
3019 	}
3020 }
3021 EXPORT_SYMBOL(neigh_sysctl_unregister);
3022 
3023 #endif	/* CONFIG_SYSCTL */
3024 
3025 static int __init neigh_init(void)
3026 {
3027 	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3028 	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3029 	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3030 
3031 	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3032 		      NULL);
3033 	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3034 
3035 	return 0;
3036 }
3037 
3038 subsys_initcall(neigh_init);
3039 
3040