xref: /openbmc/linux/net/core/neighbour.c (revision b595076a)
1 /*
2  *	Generic address resolution entity
3  *
4  *	Authors:
5  *	Pedro Roque		<roque@di.fc.ul.pt>
6  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *	Fixes:
14  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
15  *	Harald Welte		Add neighbour cache statistics like rtstat
16  */
17 
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 
40 #define NEIGH_DEBUG 1
41 
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK0 NEIGH_PRINTK
45 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
46 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
47 
48 #if NEIGH_DEBUG >= 1
49 #undef NEIGH_PRINTK1
50 #define NEIGH_PRINTK1 NEIGH_PRINTK
51 #endif
52 #if NEIGH_DEBUG >= 2
53 #undef NEIGH_PRINTK2
54 #define NEIGH_PRINTK2 NEIGH_PRINTK
55 #endif
56 
57 #define PNEIGH_HASHMASK		0xF
58 
59 static void neigh_timer_handler(unsigned long arg);
60 static void __neigh_notify(struct neighbour *n, int type, int flags);
61 static void neigh_update_notify(struct neighbour *neigh);
62 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
63 
64 static struct neigh_table *neigh_tables;
65 #ifdef CONFIG_PROC_FS
66 static const struct file_operations neigh_stat_seq_fops;
67 #endif
68 
69 /*
70    Neighbour hash table buckets are protected with rwlock tbl->lock.
71 
72    - All the scans/updates to hash buckets MUST be made under this lock.
73    - NOTHING clever should be made under this lock: no callbacks
74      to protocol backends, no attempts to send something to network.
75      It will result in deadlocks, if backend/driver wants to use neighbour
76      cache.
77    - If the entry requires some non-trivial actions, increase
78      its reference count and release table lock.
79 
80    Neighbour entries are protected:
81    - with reference count.
82    - with rwlock neigh->lock
83 
84    Reference count prevents destruction.
85 
86    neigh->lock mainly serializes ll address data and its validity state.
87    However, the same lock is used to protect another entry fields:
88     - timer
89     - resolution queue
90 
91    Again, nothing clever shall be made under neigh->lock,
92    the most complicated procedure, which we allow is dev->hard_header.
93    It is supposed, that dev->hard_header is simplistic and does
94    not make callbacks to neighbour tables.
95 
96    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
97    list of neighbour tables. This list is used only in process context,
98  */
99 
100 static DEFINE_RWLOCK(neigh_tbl_lock);
101 
102 static int neigh_blackhole(struct sk_buff *skb)
103 {
104 	kfree_skb(skb);
105 	return -ENETDOWN;
106 }
107 
108 static void neigh_cleanup_and_release(struct neighbour *neigh)
109 {
110 	if (neigh->parms->neigh_cleanup)
111 		neigh->parms->neigh_cleanup(neigh);
112 
113 	__neigh_notify(neigh, RTM_DELNEIGH, 0);
114 	neigh_release(neigh);
115 }
116 
117 /*
118  * It is random distribution in the interval (1/2)*base...(3/2)*base.
119  * It corresponds to default IPv6 settings and is not overridable,
120  * because it is really reasonable choice.
121  */
122 
123 unsigned long neigh_rand_reach_time(unsigned long base)
124 {
125 	return base ? (net_random() % base) + (base >> 1) : 0;
126 }
127 EXPORT_SYMBOL(neigh_rand_reach_time);
128 
129 
130 static int neigh_forced_gc(struct neigh_table *tbl)
131 {
132 	int shrunk = 0;
133 	int i;
134 	struct neigh_hash_table *nht;
135 
136 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
137 
138 	write_lock_bh(&tbl->lock);
139 	nht = rcu_dereference_protected(tbl->nht,
140 					lockdep_is_held(&tbl->lock));
141 	for (i = 0; i <= nht->hash_mask; i++) {
142 		struct neighbour *n;
143 		struct neighbour __rcu **np;
144 
145 		np = &nht->hash_buckets[i];
146 		while ((n = rcu_dereference_protected(*np,
147 					lockdep_is_held(&tbl->lock))) != NULL) {
148 			/* Neighbour record may be discarded if:
149 			 * - nobody refers to it.
150 			 * - it is not permanent
151 			 */
152 			write_lock(&n->lock);
153 			if (atomic_read(&n->refcnt) == 1 &&
154 			    !(n->nud_state & NUD_PERMANENT)) {
155 				rcu_assign_pointer(*np,
156 					rcu_dereference_protected(n->next,
157 						  lockdep_is_held(&tbl->lock)));
158 				n->dead = 1;
159 				shrunk	= 1;
160 				write_unlock(&n->lock);
161 				neigh_cleanup_and_release(n);
162 				continue;
163 			}
164 			write_unlock(&n->lock);
165 			np = &n->next;
166 		}
167 	}
168 
169 	tbl->last_flush = jiffies;
170 
171 	write_unlock_bh(&tbl->lock);
172 
173 	return shrunk;
174 }
175 
176 static void neigh_add_timer(struct neighbour *n, unsigned long when)
177 {
178 	neigh_hold(n);
179 	if (unlikely(mod_timer(&n->timer, when))) {
180 		printk("NEIGH: BUG, double timer add, state is %x\n",
181 		       n->nud_state);
182 		dump_stack();
183 	}
184 }
185 
186 static int neigh_del_timer(struct neighbour *n)
187 {
188 	if ((n->nud_state & NUD_IN_TIMER) &&
189 	    del_timer(&n->timer)) {
190 		neigh_release(n);
191 		return 1;
192 	}
193 	return 0;
194 }
195 
196 static void pneigh_queue_purge(struct sk_buff_head *list)
197 {
198 	struct sk_buff *skb;
199 
200 	while ((skb = skb_dequeue(list)) != NULL) {
201 		dev_put(skb->dev);
202 		kfree_skb(skb);
203 	}
204 }
205 
206 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
207 {
208 	int i;
209 	struct neigh_hash_table *nht;
210 
211 	nht = rcu_dereference_protected(tbl->nht,
212 					lockdep_is_held(&tbl->lock));
213 
214 	for (i = 0; i <= nht->hash_mask; i++) {
215 		struct neighbour *n;
216 		struct neighbour __rcu **np = &nht->hash_buckets[i];
217 
218 		while ((n = rcu_dereference_protected(*np,
219 					lockdep_is_held(&tbl->lock))) != NULL) {
220 			if (dev && n->dev != dev) {
221 				np = &n->next;
222 				continue;
223 			}
224 			rcu_assign_pointer(*np,
225 				   rcu_dereference_protected(n->next,
226 						lockdep_is_held(&tbl->lock)));
227 			write_lock(&n->lock);
228 			neigh_del_timer(n);
229 			n->dead = 1;
230 
231 			if (atomic_read(&n->refcnt) != 1) {
232 				/* The most unpleasant situation.
233 				   We must destroy neighbour entry,
234 				   but someone still uses it.
235 
236 				   The destroy will be delayed until
237 				   the last user releases us, but
238 				   we must kill timers etc. and move
239 				   it to safe state.
240 				 */
241 				skb_queue_purge(&n->arp_queue);
242 				n->output = neigh_blackhole;
243 				if (n->nud_state & NUD_VALID)
244 					n->nud_state = NUD_NOARP;
245 				else
246 					n->nud_state = NUD_NONE;
247 				NEIGH_PRINTK2("neigh %p is stray.\n", n);
248 			}
249 			write_unlock(&n->lock);
250 			neigh_cleanup_and_release(n);
251 		}
252 	}
253 }
254 
255 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
256 {
257 	write_lock_bh(&tbl->lock);
258 	neigh_flush_dev(tbl, dev);
259 	write_unlock_bh(&tbl->lock);
260 }
261 EXPORT_SYMBOL(neigh_changeaddr);
262 
263 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
264 {
265 	write_lock_bh(&tbl->lock);
266 	neigh_flush_dev(tbl, dev);
267 	pneigh_ifdown(tbl, dev);
268 	write_unlock_bh(&tbl->lock);
269 
270 	del_timer_sync(&tbl->proxy_timer);
271 	pneigh_queue_purge(&tbl->proxy_queue);
272 	return 0;
273 }
274 EXPORT_SYMBOL(neigh_ifdown);
275 
276 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
277 {
278 	struct neighbour *n = NULL;
279 	unsigned long now = jiffies;
280 	int entries;
281 
282 	entries = atomic_inc_return(&tbl->entries) - 1;
283 	if (entries >= tbl->gc_thresh3 ||
284 	    (entries >= tbl->gc_thresh2 &&
285 	     time_after(now, tbl->last_flush + 5 * HZ))) {
286 		if (!neigh_forced_gc(tbl) &&
287 		    entries >= tbl->gc_thresh3)
288 			goto out_entries;
289 	}
290 
291 	n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
292 	if (!n)
293 		goto out_entries;
294 
295 	skb_queue_head_init(&n->arp_queue);
296 	rwlock_init(&n->lock);
297 	seqlock_init(&n->ha_lock);
298 	n->updated	  = n->used = now;
299 	n->nud_state	  = NUD_NONE;
300 	n->output	  = neigh_blackhole;
301 	n->parms	  = neigh_parms_clone(&tbl->parms);
302 	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
303 
304 	NEIGH_CACHE_STAT_INC(tbl, allocs);
305 	n->tbl		  = tbl;
306 	atomic_set(&n->refcnt, 1);
307 	n->dead		  = 1;
308 out:
309 	return n;
310 
311 out_entries:
312 	atomic_dec(&tbl->entries);
313 	goto out;
314 }
315 
316 static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
317 {
318 	size_t size = entries * sizeof(struct neighbour *);
319 	struct neigh_hash_table *ret;
320 	struct neighbour **buckets;
321 
322 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
323 	if (!ret)
324 		return NULL;
325 	if (size <= PAGE_SIZE)
326 		buckets = kzalloc(size, GFP_ATOMIC);
327 	else
328 		buckets = (struct neighbour **)
329 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
330 					   get_order(size));
331 	if (!buckets) {
332 		kfree(ret);
333 		return NULL;
334 	}
335 	rcu_assign_pointer(ret->hash_buckets, buckets);
336 	ret->hash_mask = entries - 1;
337 	get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
338 	return ret;
339 }
340 
341 static void neigh_hash_free_rcu(struct rcu_head *head)
342 {
343 	struct neigh_hash_table *nht = container_of(head,
344 						    struct neigh_hash_table,
345 						    rcu);
346 	size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
347 	struct neighbour **buckets = nht->hash_buckets;
348 
349 	if (size <= PAGE_SIZE)
350 		kfree(buckets);
351 	else
352 		free_pages((unsigned long)buckets, get_order(size));
353 	kfree(nht);
354 }
355 
356 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
357 						unsigned long new_entries)
358 {
359 	unsigned int i, hash;
360 	struct neigh_hash_table *new_nht, *old_nht;
361 
362 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
363 
364 	BUG_ON(!is_power_of_2(new_entries));
365 	old_nht = rcu_dereference_protected(tbl->nht,
366 					    lockdep_is_held(&tbl->lock));
367 	new_nht = neigh_hash_alloc(new_entries);
368 	if (!new_nht)
369 		return old_nht;
370 
371 	for (i = 0; i <= old_nht->hash_mask; i++) {
372 		struct neighbour *n, *next;
373 
374 		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
375 						   lockdep_is_held(&tbl->lock));
376 		     n != NULL;
377 		     n = next) {
378 			hash = tbl->hash(n->primary_key, n->dev,
379 					 new_nht->hash_rnd);
380 
381 			hash &= new_nht->hash_mask;
382 			next = rcu_dereference_protected(n->next,
383 						lockdep_is_held(&tbl->lock));
384 
385 			rcu_assign_pointer(n->next,
386 					   rcu_dereference_protected(
387 						new_nht->hash_buckets[hash],
388 						lockdep_is_held(&tbl->lock)));
389 			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
390 		}
391 	}
392 
393 	rcu_assign_pointer(tbl->nht, new_nht);
394 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
395 	return new_nht;
396 }
397 
398 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
399 			       struct net_device *dev)
400 {
401 	struct neighbour *n;
402 	int key_len = tbl->key_len;
403 	u32 hash_val;
404 	struct neigh_hash_table *nht;
405 
406 	NEIGH_CACHE_STAT_INC(tbl, lookups);
407 
408 	rcu_read_lock_bh();
409 	nht = rcu_dereference_bh(tbl->nht);
410 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
411 
412 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
413 	     n != NULL;
414 	     n = rcu_dereference_bh(n->next)) {
415 		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
416 			if (!atomic_inc_not_zero(&n->refcnt))
417 				n = NULL;
418 			NEIGH_CACHE_STAT_INC(tbl, hits);
419 			break;
420 		}
421 	}
422 
423 	rcu_read_unlock_bh();
424 	return n;
425 }
426 EXPORT_SYMBOL(neigh_lookup);
427 
428 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
429 				     const void *pkey)
430 {
431 	struct neighbour *n;
432 	int key_len = tbl->key_len;
433 	u32 hash_val;
434 	struct neigh_hash_table *nht;
435 
436 	NEIGH_CACHE_STAT_INC(tbl, lookups);
437 
438 	rcu_read_lock_bh();
439 	nht = rcu_dereference_bh(tbl->nht);
440 	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask;
441 
442 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
443 	     n != NULL;
444 	     n = rcu_dereference_bh(n->next)) {
445 		if (!memcmp(n->primary_key, pkey, key_len) &&
446 		    net_eq(dev_net(n->dev), net)) {
447 			if (!atomic_inc_not_zero(&n->refcnt))
448 				n = NULL;
449 			NEIGH_CACHE_STAT_INC(tbl, hits);
450 			break;
451 		}
452 	}
453 
454 	rcu_read_unlock_bh();
455 	return n;
456 }
457 EXPORT_SYMBOL(neigh_lookup_nodev);
458 
459 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
460 			       struct net_device *dev)
461 {
462 	u32 hash_val;
463 	int key_len = tbl->key_len;
464 	int error;
465 	struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
466 	struct neigh_hash_table *nht;
467 
468 	if (!n) {
469 		rc = ERR_PTR(-ENOBUFS);
470 		goto out;
471 	}
472 
473 	memcpy(n->primary_key, pkey, key_len);
474 	n->dev = dev;
475 	dev_hold(dev);
476 
477 	/* Protocol specific setup. */
478 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
479 		rc = ERR_PTR(error);
480 		goto out_neigh_release;
481 	}
482 
483 	/* Device specific setup. */
484 	if (n->parms->neigh_setup &&
485 	    (error = n->parms->neigh_setup(n)) < 0) {
486 		rc = ERR_PTR(error);
487 		goto out_neigh_release;
488 	}
489 
490 	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
491 
492 	write_lock_bh(&tbl->lock);
493 	nht = rcu_dereference_protected(tbl->nht,
494 					lockdep_is_held(&tbl->lock));
495 
496 	if (atomic_read(&tbl->entries) > (nht->hash_mask + 1))
497 		nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1);
498 
499 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
500 
501 	if (n->parms->dead) {
502 		rc = ERR_PTR(-EINVAL);
503 		goto out_tbl_unlock;
504 	}
505 
506 	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
507 					    lockdep_is_held(&tbl->lock));
508 	     n1 != NULL;
509 	     n1 = rcu_dereference_protected(n1->next,
510 			lockdep_is_held(&tbl->lock))) {
511 		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
512 			neigh_hold(n1);
513 			rc = n1;
514 			goto out_tbl_unlock;
515 		}
516 	}
517 
518 	n->dead = 0;
519 	neigh_hold(n);
520 	rcu_assign_pointer(n->next,
521 			   rcu_dereference_protected(nht->hash_buckets[hash_val],
522 						     lockdep_is_held(&tbl->lock)));
523 	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
524 	write_unlock_bh(&tbl->lock);
525 	NEIGH_PRINTK2("neigh %p is created.\n", n);
526 	rc = n;
527 out:
528 	return rc;
529 out_tbl_unlock:
530 	write_unlock_bh(&tbl->lock);
531 out_neigh_release:
532 	neigh_release(n);
533 	goto out;
534 }
535 EXPORT_SYMBOL(neigh_create);
536 
537 static u32 pneigh_hash(const void *pkey, int key_len)
538 {
539 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
540 	hash_val ^= (hash_val >> 16);
541 	hash_val ^= hash_val >> 8;
542 	hash_val ^= hash_val >> 4;
543 	hash_val &= PNEIGH_HASHMASK;
544 	return hash_val;
545 }
546 
547 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
548 					      struct net *net,
549 					      const void *pkey,
550 					      int key_len,
551 					      struct net_device *dev)
552 {
553 	while (n) {
554 		if (!memcmp(n->key, pkey, key_len) &&
555 		    net_eq(pneigh_net(n), net) &&
556 		    (n->dev == dev || !n->dev))
557 			return n;
558 		n = n->next;
559 	}
560 	return NULL;
561 }
562 
563 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
564 		struct net *net, const void *pkey, struct net_device *dev)
565 {
566 	int key_len = tbl->key_len;
567 	u32 hash_val = pneigh_hash(pkey, key_len);
568 
569 	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
570 				 net, pkey, key_len, dev);
571 }
572 EXPORT_SYMBOL_GPL(__pneigh_lookup);
573 
574 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
575 				    struct net *net, const void *pkey,
576 				    struct net_device *dev, int creat)
577 {
578 	struct pneigh_entry *n;
579 	int key_len = tbl->key_len;
580 	u32 hash_val = pneigh_hash(pkey, key_len);
581 
582 	read_lock_bh(&tbl->lock);
583 	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
584 			      net, pkey, key_len, dev);
585 	read_unlock_bh(&tbl->lock);
586 
587 	if (n || !creat)
588 		goto out;
589 
590 	ASSERT_RTNL();
591 
592 	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
593 	if (!n)
594 		goto out;
595 
596 	write_pnet(&n->net, hold_net(net));
597 	memcpy(n->key, pkey, key_len);
598 	n->dev = dev;
599 	if (dev)
600 		dev_hold(dev);
601 
602 	if (tbl->pconstructor && tbl->pconstructor(n)) {
603 		if (dev)
604 			dev_put(dev);
605 		release_net(net);
606 		kfree(n);
607 		n = NULL;
608 		goto out;
609 	}
610 
611 	write_lock_bh(&tbl->lock);
612 	n->next = tbl->phash_buckets[hash_val];
613 	tbl->phash_buckets[hash_val] = n;
614 	write_unlock_bh(&tbl->lock);
615 out:
616 	return n;
617 }
618 EXPORT_SYMBOL(pneigh_lookup);
619 
620 
621 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
622 		  struct net_device *dev)
623 {
624 	struct pneigh_entry *n, **np;
625 	int key_len = tbl->key_len;
626 	u32 hash_val = pneigh_hash(pkey, key_len);
627 
628 	write_lock_bh(&tbl->lock);
629 	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
630 	     np = &n->next) {
631 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
632 		    net_eq(pneigh_net(n), net)) {
633 			*np = n->next;
634 			write_unlock_bh(&tbl->lock);
635 			if (tbl->pdestructor)
636 				tbl->pdestructor(n);
637 			if (n->dev)
638 				dev_put(n->dev);
639 			release_net(pneigh_net(n));
640 			kfree(n);
641 			return 0;
642 		}
643 	}
644 	write_unlock_bh(&tbl->lock);
645 	return -ENOENT;
646 }
647 
648 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
649 {
650 	struct pneigh_entry *n, **np;
651 	u32 h;
652 
653 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
654 		np = &tbl->phash_buckets[h];
655 		while ((n = *np) != NULL) {
656 			if (!dev || n->dev == dev) {
657 				*np = n->next;
658 				if (tbl->pdestructor)
659 					tbl->pdestructor(n);
660 				if (n->dev)
661 					dev_put(n->dev);
662 				release_net(pneigh_net(n));
663 				kfree(n);
664 				continue;
665 			}
666 			np = &n->next;
667 		}
668 	}
669 	return -ENOENT;
670 }
671 
672 static void neigh_parms_destroy(struct neigh_parms *parms);
673 
674 static inline void neigh_parms_put(struct neigh_parms *parms)
675 {
676 	if (atomic_dec_and_test(&parms->refcnt))
677 		neigh_parms_destroy(parms);
678 }
679 
680 static void neigh_destroy_rcu(struct rcu_head *head)
681 {
682 	struct neighbour *neigh = container_of(head, struct neighbour, rcu);
683 
684 	kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
685 }
686 /*
687  *	neighbour must already be out of the table;
688  *
689  */
690 void neigh_destroy(struct neighbour *neigh)
691 {
692 	struct hh_cache *hh;
693 
694 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
695 
696 	if (!neigh->dead) {
697 		printk(KERN_WARNING
698 		       "Destroying alive neighbour %p\n", neigh);
699 		dump_stack();
700 		return;
701 	}
702 
703 	if (neigh_del_timer(neigh))
704 		printk(KERN_WARNING "Impossible event.\n");
705 
706 	while ((hh = neigh->hh) != NULL) {
707 		neigh->hh = hh->hh_next;
708 		hh->hh_next = NULL;
709 
710 		write_seqlock_bh(&hh->hh_lock);
711 		hh->hh_output = neigh_blackhole;
712 		write_sequnlock_bh(&hh->hh_lock);
713 		hh_cache_put(hh);
714 	}
715 
716 	skb_queue_purge(&neigh->arp_queue);
717 
718 	dev_put(neigh->dev);
719 	neigh_parms_put(neigh->parms);
720 
721 	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
722 
723 	atomic_dec(&neigh->tbl->entries);
724 	call_rcu(&neigh->rcu, neigh_destroy_rcu);
725 }
726 EXPORT_SYMBOL(neigh_destroy);
727 
728 /* Neighbour state is suspicious;
729    disable fast path.
730 
731    Called with write_locked neigh.
732  */
733 static void neigh_suspect(struct neighbour *neigh)
734 {
735 	struct hh_cache *hh;
736 
737 	NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
738 
739 	neigh->output = neigh->ops->output;
740 
741 	for (hh = neigh->hh; hh; hh = hh->hh_next)
742 		hh->hh_output = neigh->ops->output;
743 }
744 
745 /* Neighbour state is OK;
746    enable fast path.
747 
748    Called with write_locked neigh.
749  */
750 static void neigh_connect(struct neighbour *neigh)
751 {
752 	struct hh_cache *hh;
753 
754 	NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
755 
756 	neigh->output = neigh->ops->connected_output;
757 
758 	for (hh = neigh->hh; hh; hh = hh->hh_next)
759 		hh->hh_output = neigh->ops->hh_output;
760 }
761 
762 static void neigh_periodic_work(struct work_struct *work)
763 {
764 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
765 	struct neighbour *n;
766 	struct neighbour __rcu **np;
767 	unsigned int i;
768 	struct neigh_hash_table *nht;
769 
770 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
771 
772 	write_lock_bh(&tbl->lock);
773 	nht = rcu_dereference_protected(tbl->nht,
774 					lockdep_is_held(&tbl->lock));
775 
776 	/*
777 	 *	periodically recompute ReachableTime from random function
778 	 */
779 
780 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
781 		struct neigh_parms *p;
782 		tbl->last_rand = jiffies;
783 		for (p = &tbl->parms; p; p = p->next)
784 			p->reachable_time =
785 				neigh_rand_reach_time(p->base_reachable_time);
786 	}
787 
788 	for (i = 0 ; i <= nht->hash_mask; i++) {
789 		np = &nht->hash_buckets[i];
790 
791 		while ((n = rcu_dereference_protected(*np,
792 				lockdep_is_held(&tbl->lock))) != NULL) {
793 			unsigned int state;
794 
795 			write_lock(&n->lock);
796 
797 			state = n->nud_state;
798 			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
799 				write_unlock(&n->lock);
800 				goto next_elt;
801 			}
802 
803 			if (time_before(n->used, n->confirmed))
804 				n->used = n->confirmed;
805 
806 			if (atomic_read(&n->refcnt) == 1 &&
807 			    (state == NUD_FAILED ||
808 			     time_after(jiffies, n->used + n->parms->gc_staletime))) {
809 				*np = n->next;
810 				n->dead = 1;
811 				write_unlock(&n->lock);
812 				neigh_cleanup_and_release(n);
813 				continue;
814 			}
815 			write_unlock(&n->lock);
816 
817 next_elt:
818 			np = &n->next;
819 		}
820 		/*
821 		 * It's fine to release lock here, even if hash table
822 		 * grows while we are preempted.
823 		 */
824 		write_unlock_bh(&tbl->lock);
825 		cond_resched();
826 		write_lock_bh(&tbl->lock);
827 	}
828 	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
829 	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
830 	 * base_reachable_time.
831 	 */
832 	schedule_delayed_work(&tbl->gc_work,
833 			      tbl->parms.base_reachable_time >> 1);
834 	write_unlock_bh(&tbl->lock);
835 }
836 
837 static __inline__ int neigh_max_probes(struct neighbour *n)
838 {
839 	struct neigh_parms *p = n->parms;
840 	return (n->nud_state & NUD_PROBE) ?
841 		p->ucast_probes :
842 		p->ucast_probes + p->app_probes + p->mcast_probes;
843 }
844 
845 static void neigh_invalidate(struct neighbour *neigh)
846 	__releases(neigh->lock)
847 	__acquires(neigh->lock)
848 {
849 	struct sk_buff *skb;
850 
851 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
852 	NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
853 	neigh->updated = jiffies;
854 
855 	/* It is very thin place. report_unreachable is very complicated
856 	   routine. Particularly, it can hit the same neighbour entry!
857 
858 	   So that, we try to be accurate and avoid dead loop. --ANK
859 	 */
860 	while (neigh->nud_state == NUD_FAILED &&
861 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
862 		write_unlock(&neigh->lock);
863 		neigh->ops->error_report(neigh, skb);
864 		write_lock(&neigh->lock);
865 	}
866 	skb_queue_purge(&neigh->arp_queue);
867 }
868 
869 /* Called when a timer expires for a neighbour entry. */
870 
871 static void neigh_timer_handler(unsigned long arg)
872 {
873 	unsigned long now, next;
874 	struct neighbour *neigh = (struct neighbour *)arg;
875 	unsigned state;
876 	int notify = 0;
877 
878 	write_lock(&neigh->lock);
879 
880 	state = neigh->nud_state;
881 	now = jiffies;
882 	next = now + HZ;
883 
884 	if (!(state & NUD_IN_TIMER)) {
885 #ifndef CONFIG_SMP
886 		printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
887 #endif
888 		goto out;
889 	}
890 
891 	if (state & NUD_REACHABLE) {
892 		if (time_before_eq(now,
893 				   neigh->confirmed + neigh->parms->reachable_time)) {
894 			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
895 			next = neigh->confirmed + neigh->parms->reachable_time;
896 		} else if (time_before_eq(now,
897 					  neigh->used + neigh->parms->delay_probe_time)) {
898 			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
899 			neigh->nud_state = NUD_DELAY;
900 			neigh->updated = jiffies;
901 			neigh_suspect(neigh);
902 			next = now + neigh->parms->delay_probe_time;
903 		} else {
904 			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
905 			neigh->nud_state = NUD_STALE;
906 			neigh->updated = jiffies;
907 			neigh_suspect(neigh);
908 			notify = 1;
909 		}
910 	} else if (state & NUD_DELAY) {
911 		if (time_before_eq(now,
912 				   neigh->confirmed + neigh->parms->delay_probe_time)) {
913 			NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
914 			neigh->nud_state = NUD_REACHABLE;
915 			neigh->updated = jiffies;
916 			neigh_connect(neigh);
917 			notify = 1;
918 			next = neigh->confirmed + neigh->parms->reachable_time;
919 		} else {
920 			NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
921 			neigh->nud_state = NUD_PROBE;
922 			neigh->updated = jiffies;
923 			atomic_set(&neigh->probes, 0);
924 			next = now + neigh->parms->retrans_time;
925 		}
926 	} else {
927 		/* NUD_PROBE|NUD_INCOMPLETE */
928 		next = now + neigh->parms->retrans_time;
929 	}
930 
931 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
932 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
933 		neigh->nud_state = NUD_FAILED;
934 		notify = 1;
935 		neigh_invalidate(neigh);
936 	}
937 
938 	if (neigh->nud_state & NUD_IN_TIMER) {
939 		if (time_before(next, jiffies + HZ/2))
940 			next = jiffies + HZ/2;
941 		if (!mod_timer(&neigh->timer, next))
942 			neigh_hold(neigh);
943 	}
944 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
945 		struct sk_buff *skb = skb_peek(&neigh->arp_queue);
946 		/* keep skb alive even if arp_queue overflows */
947 		if (skb)
948 			skb = skb_copy(skb, GFP_ATOMIC);
949 		write_unlock(&neigh->lock);
950 		neigh->ops->solicit(neigh, skb);
951 		atomic_inc(&neigh->probes);
952 		kfree_skb(skb);
953 	} else {
954 out:
955 		write_unlock(&neigh->lock);
956 	}
957 
958 	if (notify)
959 		neigh_update_notify(neigh);
960 
961 	neigh_release(neigh);
962 }
963 
964 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
965 {
966 	int rc;
967 	unsigned long now;
968 
969 	write_lock_bh(&neigh->lock);
970 
971 	rc = 0;
972 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
973 		goto out_unlock_bh;
974 
975 	now = jiffies;
976 
977 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
978 		if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
979 			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
980 			neigh->nud_state     = NUD_INCOMPLETE;
981 			neigh->updated = jiffies;
982 			neigh_add_timer(neigh, now + 1);
983 		} else {
984 			neigh->nud_state = NUD_FAILED;
985 			neigh->updated = jiffies;
986 			write_unlock_bh(&neigh->lock);
987 
988 			kfree_skb(skb);
989 			return 1;
990 		}
991 	} else if (neigh->nud_state & NUD_STALE) {
992 		NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
993 		neigh->nud_state = NUD_DELAY;
994 		neigh->updated = jiffies;
995 		neigh_add_timer(neigh,
996 				jiffies + neigh->parms->delay_probe_time);
997 	}
998 
999 	if (neigh->nud_state == NUD_INCOMPLETE) {
1000 		if (skb) {
1001 			if (skb_queue_len(&neigh->arp_queue) >=
1002 			    neigh->parms->queue_len) {
1003 				struct sk_buff *buff;
1004 				buff = __skb_dequeue(&neigh->arp_queue);
1005 				kfree_skb(buff);
1006 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1007 			}
1008 			skb_dst_force(skb);
1009 			__skb_queue_tail(&neigh->arp_queue, skb);
1010 		}
1011 		rc = 1;
1012 	}
1013 out_unlock_bh:
1014 	write_unlock_bh(&neigh->lock);
1015 	return rc;
1016 }
1017 EXPORT_SYMBOL(__neigh_event_send);
1018 
1019 static void neigh_update_hhs(const struct neighbour *neigh)
1020 {
1021 	struct hh_cache *hh;
1022 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1023 		= NULL;
1024 
1025 	if (neigh->dev->header_ops)
1026 		update = neigh->dev->header_ops->cache_update;
1027 
1028 	if (update) {
1029 		for (hh = neigh->hh; hh; hh = hh->hh_next) {
1030 			write_seqlock_bh(&hh->hh_lock);
1031 			update(hh, neigh->dev, neigh->ha);
1032 			write_sequnlock_bh(&hh->hh_lock);
1033 		}
1034 	}
1035 }
1036 
1037 
1038 
1039 /* Generic update routine.
1040    -- lladdr is new lladdr or NULL, if it is not supplied.
1041    -- new    is new state.
1042    -- flags
1043 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1044 				if it is different.
1045 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1046 				lladdr instead of overriding it
1047 				if it is different.
1048 				It also allows to retain current state
1049 				if lladdr is unchanged.
1050 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1051 
1052 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1053 				NTF_ROUTER flag.
1054 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1055 				a router.
1056 
1057    Caller MUST hold reference count on the entry.
1058  */
1059 
1060 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1061 		 u32 flags)
1062 {
1063 	u8 old;
1064 	int err;
1065 	int notify = 0;
1066 	struct net_device *dev;
1067 	int update_isrouter = 0;
1068 
1069 	write_lock_bh(&neigh->lock);
1070 
1071 	dev    = neigh->dev;
1072 	old    = neigh->nud_state;
1073 	err    = -EPERM;
1074 
1075 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1076 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1077 		goto out;
1078 
1079 	if (!(new & NUD_VALID)) {
1080 		neigh_del_timer(neigh);
1081 		if (old & NUD_CONNECTED)
1082 			neigh_suspect(neigh);
1083 		neigh->nud_state = new;
1084 		err = 0;
1085 		notify = old & NUD_VALID;
1086 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1087 		    (new & NUD_FAILED)) {
1088 			neigh_invalidate(neigh);
1089 			notify = 1;
1090 		}
1091 		goto out;
1092 	}
1093 
1094 	/* Compare new lladdr with cached one */
1095 	if (!dev->addr_len) {
1096 		/* First case: device needs no address. */
1097 		lladdr = neigh->ha;
1098 	} else if (lladdr) {
1099 		/* The second case: if something is already cached
1100 		   and a new address is proposed:
1101 		   - compare new & old
1102 		   - if they are different, check override flag
1103 		 */
1104 		if ((old & NUD_VALID) &&
1105 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1106 			lladdr = neigh->ha;
1107 	} else {
1108 		/* No address is supplied; if we know something,
1109 		   use it, otherwise discard the request.
1110 		 */
1111 		err = -EINVAL;
1112 		if (!(old & NUD_VALID))
1113 			goto out;
1114 		lladdr = neigh->ha;
1115 	}
1116 
1117 	if (new & NUD_CONNECTED)
1118 		neigh->confirmed = jiffies;
1119 	neigh->updated = jiffies;
1120 
1121 	/* If entry was valid and address is not changed,
1122 	   do not change entry state, if new one is STALE.
1123 	 */
1124 	err = 0;
1125 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1126 	if (old & NUD_VALID) {
1127 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1128 			update_isrouter = 0;
1129 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1130 			    (old & NUD_CONNECTED)) {
1131 				lladdr = neigh->ha;
1132 				new = NUD_STALE;
1133 			} else
1134 				goto out;
1135 		} else {
1136 			if (lladdr == neigh->ha && new == NUD_STALE &&
1137 			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1138 			     (old & NUD_CONNECTED))
1139 			    )
1140 				new = old;
1141 		}
1142 	}
1143 
1144 	if (new != old) {
1145 		neigh_del_timer(neigh);
1146 		if (new & NUD_IN_TIMER)
1147 			neigh_add_timer(neigh, (jiffies +
1148 						((new & NUD_REACHABLE) ?
1149 						 neigh->parms->reachable_time :
1150 						 0)));
1151 		neigh->nud_state = new;
1152 	}
1153 
1154 	if (lladdr != neigh->ha) {
1155 		write_seqlock(&neigh->ha_lock);
1156 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1157 		write_sequnlock(&neigh->ha_lock);
1158 		neigh_update_hhs(neigh);
1159 		if (!(new & NUD_CONNECTED))
1160 			neigh->confirmed = jiffies -
1161 				      (neigh->parms->base_reachable_time << 1);
1162 		notify = 1;
1163 	}
1164 	if (new == old)
1165 		goto out;
1166 	if (new & NUD_CONNECTED)
1167 		neigh_connect(neigh);
1168 	else
1169 		neigh_suspect(neigh);
1170 	if (!(old & NUD_VALID)) {
1171 		struct sk_buff *skb;
1172 
1173 		/* Again: avoid dead loop if something went wrong */
1174 
1175 		while (neigh->nud_state & NUD_VALID &&
1176 		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1177 			struct neighbour *n1 = neigh;
1178 			write_unlock_bh(&neigh->lock);
1179 			/* On shaper/eql skb->dst->neighbour != neigh :( */
1180 			if (skb_dst(skb) && skb_dst(skb)->neighbour)
1181 				n1 = skb_dst(skb)->neighbour;
1182 			n1->output(skb);
1183 			write_lock_bh(&neigh->lock);
1184 		}
1185 		skb_queue_purge(&neigh->arp_queue);
1186 	}
1187 out:
1188 	if (update_isrouter) {
1189 		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1190 			(neigh->flags | NTF_ROUTER) :
1191 			(neigh->flags & ~NTF_ROUTER);
1192 	}
1193 	write_unlock_bh(&neigh->lock);
1194 
1195 	if (notify)
1196 		neigh_update_notify(neigh);
1197 
1198 	return err;
1199 }
1200 EXPORT_SYMBOL(neigh_update);
1201 
1202 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1203 				 u8 *lladdr, void *saddr,
1204 				 struct net_device *dev)
1205 {
1206 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1207 						 lladdr || !dev->addr_len);
1208 	if (neigh)
1209 		neigh_update(neigh, lladdr, NUD_STALE,
1210 			     NEIGH_UPDATE_F_OVERRIDE);
1211 	return neigh;
1212 }
1213 EXPORT_SYMBOL(neigh_event_ns);
1214 
1215 static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
1216 				   __be16 protocol)
1217 {
1218 	struct hh_cache *hh;
1219 
1220 	smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
1221 	for (hh = n->hh; hh; hh = hh->hh_next) {
1222 		if (hh->hh_type == protocol) {
1223 			atomic_inc(&hh->hh_refcnt);
1224 			if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
1225 				hh_cache_put(hh);
1226 			return true;
1227 		}
1228 	}
1229 	return false;
1230 }
1231 
1232 /* called with read_lock_bh(&n->lock); */
1233 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
1234 			  __be16 protocol)
1235 {
1236 	struct hh_cache	*hh;
1237 	struct net_device *dev = dst->dev;
1238 
1239 	if (likely(neigh_hh_lookup(n, dst, protocol)))
1240 		return;
1241 
1242 	/* slow path */
1243 	hh = kzalloc(sizeof(*hh), GFP_ATOMIC);
1244 	if (!hh)
1245 		return;
1246 
1247 	seqlock_init(&hh->hh_lock);
1248 	hh->hh_type = protocol;
1249 	atomic_set(&hh->hh_refcnt, 2);
1250 
1251 	if (dev->header_ops->cache(n, hh)) {
1252 		kfree(hh);
1253 		return;
1254 	}
1255 
1256 	write_lock_bh(&n->lock);
1257 
1258 	/* must check if another thread already did the insert */
1259 	if (neigh_hh_lookup(n, dst, protocol)) {
1260 		kfree(hh);
1261 		goto end;
1262 	}
1263 
1264 	if (n->nud_state & NUD_CONNECTED)
1265 		hh->hh_output = n->ops->hh_output;
1266 	else
1267 		hh->hh_output = n->ops->output;
1268 
1269 	hh->hh_next = n->hh;
1270 	smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
1271 	n->hh	    = hh;
1272 
1273 	if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
1274 		hh_cache_put(hh);
1275 end:
1276 	write_unlock_bh(&n->lock);
1277 }
1278 
1279 /* This function can be used in contexts, where only old dev_queue_xmit
1280  * worked, f.e. if you want to override normal output path (eql, shaper),
1281  * but resolution is not made yet.
1282  */
1283 
1284 int neigh_compat_output(struct sk_buff *skb)
1285 {
1286 	struct net_device *dev = skb->dev;
1287 
1288 	__skb_pull(skb, skb_network_offset(skb));
1289 
1290 	if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1291 			    skb->len) < 0 &&
1292 	    dev->header_ops->rebuild(skb))
1293 		return 0;
1294 
1295 	return dev_queue_xmit(skb);
1296 }
1297 EXPORT_SYMBOL(neigh_compat_output);
1298 
1299 /* Slow and careful. */
1300 
1301 int neigh_resolve_output(struct sk_buff *skb)
1302 {
1303 	struct dst_entry *dst = skb_dst(skb);
1304 	struct neighbour *neigh;
1305 	int rc = 0;
1306 
1307 	if (!dst || !(neigh = dst->neighbour))
1308 		goto discard;
1309 
1310 	__skb_pull(skb, skb_network_offset(skb));
1311 
1312 	if (!neigh_event_send(neigh, skb)) {
1313 		int err;
1314 		struct net_device *dev = neigh->dev;
1315 		unsigned int seq;
1316 
1317 		if (dev->header_ops->cache &&
1318 		    !dst->hh &&
1319 		    !(dst->flags & DST_NOCACHE))
1320 			neigh_hh_init(neigh, dst, dst->ops->protocol);
1321 
1322 		do {
1323 			seq = read_seqbegin(&neigh->ha_lock);
1324 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1325 					      neigh->ha, NULL, skb->len);
1326 		} while (read_seqretry(&neigh->ha_lock, seq));
1327 
1328 		if (err >= 0)
1329 			rc = neigh->ops->queue_xmit(skb);
1330 		else
1331 			goto out_kfree_skb;
1332 	}
1333 out:
1334 	return rc;
1335 discard:
1336 	NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1337 		      dst, dst ? dst->neighbour : NULL);
1338 out_kfree_skb:
1339 	rc = -EINVAL;
1340 	kfree_skb(skb);
1341 	goto out;
1342 }
1343 EXPORT_SYMBOL(neigh_resolve_output);
1344 
1345 /* As fast as possible without hh cache */
1346 
1347 int neigh_connected_output(struct sk_buff *skb)
1348 {
1349 	int err;
1350 	struct dst_entry *dst = skb_dst(skb);
1351 	struct neighbour *neigh = dst->neighbour;
1352 	struct net_device *dev = neigh->dev;
1353 	unsigned int seq;
1354 
1355 	__skb_pull(skb, skb_network_offset(skb));
1356 
1357 	do {
1358 		seq = read_seqbegin(&neigh->ha_lock);
1359 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1360 				      neigh->ha, NULL, skb->len);
1361 	} while (read_seqretry(&neigh->ha_lock, seq));
1362 
1363 	if (err >= 0)
1364 		err = neigh->ops->queue_xmit(skb);
1365 	else {
1366 		err = -EINVAL;
1367 		kfree_skb(skb);
1368 	}
1369 	return err;
1370 }
1371 EXPORT_SYMBOL(neigh_connected_output);
1372 
1373 static void neigh_proxy_process(unsigned long arg)
1374 {
1375 	struct neigh_table *tbl = (struct neigh_table *)arg;
1376 	long sched_next = 0;
1377 	unsigned long now = jiffies;
1378 	struct sk_buff *skb, *n;
1379 
1380 	spin_lock(&tbl->proxy_queue.lock);
1381 
1382 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1383 		long tdif = NEIGH_CB(skb)->sched_next - now;
1384 
1385 		if (tdif <= 0) {
1386 			struct net_device *dev = skb->dev;
1387 			__skb_unlink(skb, &tbl->proxy_queue);
1388 			if (tbl->proxy_redo && netif_running(dev))
1389 				tbl->proxy_redo(skb);
1390 			else
1391 				kfree_skb(skb);
1392 
1393 			dev_put(dev);
1394 		} else if (!sched_next || tdif < sched_next)
1395 			sched_next = tdif;
1396 	}
1397 	del_timer(&tbl->proxy_timer);
1398 	if (sched_next)
1399 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1400 	spin_unlock(&tbl->proxy_queue.lock);
1401 }
1402 
1403 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1404 		    struct sk_buff *skb)
1405 {
1406 	unsigned long now = jiffies;
1407 	unsigned long sched_next = now + (net_random() % p->proxy_delay);
1408 
1409 	if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1410 		kfree_skb(skb);
1411 		return;
1412 	}
1413 
1414 	NEIGH_CB(skb)->sched_next = sched_next;
1415 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1416 
1417 	spin_lock(&tbl->proxy_queue.lock);
1418 	if (del_timer(&tbl->proxy_timer)) {
1419 		if (time_before(tbl->proxy_timer.expires, sched_next))
1420 			sched_next = tbl->proxy_timer.expires;
1421 	}
1422 	skb_dst_drop(skb);
1423 	dev_hold(skb->dev);
1424 	__skb_queue_tail(&tbl->proxy_queue, skb);
1425 	mod_timer(&tbl->proxy_timer, sched_next);
1426 	spin_unlock(&tbl->proxy_queue.lock);
1427 }
1428 EXPORT_SYMBOL(pneigh_enqueue);
1429 
1430 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1431 						      struct net *net, int ifindex)
1432 {
1433 	struct neigh_parms *p;
1434 
1435 	for (p = &tbl->parms; p; p = p->next) {
1436 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1437 		    (!p->dev && !ifindex))
1438 			return p;
1439 	}
1440 
1441 	return NULL;
1442 }
1443 
1444 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1445 				      struct neigh_table *tbl)
1446 {
1447 	struct neigh_parms *p, *ref;
1448 	struct net *net = dev_net(dev);
1449 	const struct net_device_ops *ops = dev->netdev_ops;
1450 
1451 	ref = lookup_neigh_parms(tbl, net, 0);
1452 	if (!ref)
1453 		return NULL;
1454 
1455 	p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1456 	if (p) {
1457 		p->tbl		  = tbl;
1458 		atomic_set(&p->refcnt, 1);
1459 		p->reachable_time =
1460 				neigh_rand_reach_time(p->base_reachable_time);
1461 
1462 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1463 			kfree(p);
1464 			return NULL;
1465 		}
1466 
1467 		dev_hold(dev);
1468 		p->dev = dev;
1469 		write_pnet(&p->net, hold_net(net));
1470 		p->sysctl_table = NULL;
1471 		write_lock_bh(&tbl->lock);
1472 		p->next		= tbl->parms.next;
1473 		tbl->parms.next = p;
1474 		write_unlock_bh(&tbl->lock);
1475 	}
1476 	return p;
1477 }
1478 EXPORT_SYMBOL(neigh_parms_alloc);
1479 
1480 static void neigh_rcu_free_parms(struct rcu_head *head)
1481 {
1482 	struct neigh_parms *parms =
1483 		container_of(head, struct neigh_parms, rcu_head);
1484 
1485 	neigh_parms_put(parms);
1486 }
1487 
1488 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1489 {
1490 	struct neigh_parms **p;
1491 
1492 	if (!parms || parms == &tbl->parms)
1493 		return;
1494 	write_lock_bh(&tbl->lock);
1495 	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1496 		if (*p == parms) {
1497 			*p = parms->next;
1498 			parms->dead = 1;
1499 			write_unlock_bh(&tbl->lock);
1500 			if (parms->dev)
1501 				dev_put(parms->dev);
1502 			call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1503 			return;
1504 		}
1505 	}
1506 	write_unlock_bh(&tbl->lock);
1507 	NEIGH_PRINTK1("neigh_parms_release: not found\n");
1508 }
1509 EXPORT_SYMBOL(neigh_parms_release);
1510 
1511 static void neigh_parms_destroy(struct neigh_parms *parms)
1512 {
1513 	release_net(neigh_parms_net(parms));
1514 	kfree(parms);
1515 }
1516 
1517 static struct lock_class_key neigh_table_proxy_queue_class;
1518 
1519 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1520 {
1521 	unsigned long now = jiffies;
1522 	unsigned long phsize;
1523 
1524 	write_pnet(&tbl->parms.net, &init_net);
1525 	atomic_set(&tbl->parms.refcnt, 1);
1526 	tbl->parms.reachable_time =
1527 			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
1528 
1529 	if (!tbl->kmem_cachep)
1530 		tbl->kmem_cachep =
1531 			kmem_cache_create(tbl->id, tbl->entry_size, 0,
1532 					  SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1533 					  NULL);
1534 	tbl->stats = alloc_percpu(struct neigh_statistics);
1535 	if (!tbl->stats)
1536 		panic("cannot create neighbour cache statistics");
1537 
1538 #ifdef CONFIG_PROC_FS
1539 	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1540 			      &neigh_stat_seq_fops, tbl))
1541 		panic("cannot create neighbour proc dir entry");
1542 #endif
1543 
1544 	tbl->nht = neigh_hash_alloc(8);
1545 
1546 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1547 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1548 
1549 	if (!tbl->nht || !tbl->phash_buckets)
1550 		panic("cannot allocate neighbour cache hashes");
1551 
1552 	rwlock_init(&tbl->lock);
1553 	INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1554 	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1555 	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1556 	skb_queue_head_init_class(&tbl->proxy_queue,
1557 			&neigh_table_proxy_queue_class);
1558 
1559 	tbl->last_flush = now;
1560 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1561 }
1562 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1563 
1564 void neigh_table_init(struct neigh_table *tbl)
1565 {
1566 	struct neigh_table *tmp;
1567 
1568 	neigh_table_init_no_netlink(tbl);
1569 	write_lock(&neigh_tbl_lock);
1570 	for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1571 		if (tmp->family == tbl->family)
1572 			break;
1573 	}
1574 	tbl->next	= neigh_tables;
1575 	neigh_tables	= tbl;
1576 	write_unlock(&neigh_tbl_lock);
1577 
1578 	if (unlikely(tmp)) {
1579 		printk(KERN_ERR "NEIGH: Registering multiple tables for "
1580 		       "family %d\n", tbl->family);
1581 		dump_stack();
1582 	}
1583 }
1584 EXPORT_SYMBOL(neigh_table_init);
1585 
1586 int neigh_table_clear(struct neigh_table *tbl)
1587 {
1588 	struct neigh_table **tp;
1589 
1590 	/* It is not clean... Fix it to unload IPv6 module safely */
1591 	cancel_delayed_work_sync(&tbl->gc_work);
1592 	del_timer_sync(&tbl->proxy_timer);
1593 	pneigh_queue_purge(&tbl->proxy_queue);
1594 	neigh_ifdown(tbl, NULL);
1595 	if (atomic_read(&tbl->entries))
1596 		printk(KERN_CRIT "neighbour leakage\n");
1597 	write_lock(&neigh_tbl_lock);
1598 	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1599 		if (*tp == tbl) {
1600 			*tp = tbl->next;
1601 			break;
1602 		}
1603 	}
1604 	write_unlock(&neigh_tbl_lock);
1605 
1606 	call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu);
1607 	tbl->nht = NULL;
1608 
1609 	kfree(tbl->phash_buckets);
1610 	tbl->phash_buckets = NULL;
1611 
1612 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1613 
1614 	free_percpu(tbl->stats);
1615 	tbl->stats = NULL;
1616 
1617 	kmem_cache_destroy(tbl->kmem_cachep);
1618 	tbl->kmem_cachep = NULL;
1619 
1620 	return 0;
1621 }
1622 EXPORT_SYMBOL(neigh_table_clear);
1623 
1624 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1625 {
1626 	struct net *net = sock_net(skb->sk);
1627 	struct ndmsg *ndm;
1628 	struct nlattr *dst_attr;
1629 	struct neigh_table *tbl;
1630 	struct net_device *dev = NULL;
1631 	int err = -EINVAL;
1632 
1633 	ASSERT_RTNL();
1634 	if (nlmsg_len(nlh) < sizeof(*ndm))
1635 		goto out;
1636 
1637 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1638 	if (dst_attr == NULL)
1639 		goto out;
1640 
1641 	ndm = nlmsg_data(nlh);
1642 	if (ndm->ndm_ifindex) {
1643 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1644 		if (dev == NULL) {
1645 			err = -ENODEV;
1646 			goto out;
1647 		}
1648 	}
1649 
1650 	read_lock(&neigh_tbl_lock);
1651 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1652 		struct neighbour *neigh;
1653 
1654 		if (tbl->family != ndm->ndm_family)
1655 			continue;
1656 		read_unlock(&neigh_tbl_lock);
1657 
1658 		if (nla_len(dst_attr) < tbl->key_len)
1659 			goto out;
1660 
1661 		if (ndm->ndm_flags & NTF_PROXY) {
1662 			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1663 			goto out;
1664 		}
1665 
1666 		if (dev == NULL)
1667 			goto out;
1668 
1669 		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1670 		if (neigh == NULL) {
1671 			err = -ENOENT;
1672 			goto out;
1673 		}
1674 
1675 		err = neigh_update(neigh, NULL, NUD_FAILED,
1676 				   NEIGH_UPDATE_F_OVERRIDE |
1677 				   NEIGH_UPDATE_F_ADMIN);
1678 		neigh_release(neigh);
1679 		goto out;
1680 	}
1681 	read_unlock(&neigh_tbl_lock);
1682 	err = -EAFNOSUPPORT;
1683 
1684 out:
1685 	return err;
1686 }
1687 
1688 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1689 {
1690 	struct net *net = sock_net(skb->sk);
1691 	struct ndmsg *ndm;
1692 	struct nlattr *tb[NDA_MAX+1];
1693 	struct neigh_table *tbl;
1694 	struct net_device *dev = NULL;
1695 	int err;
1696 
1697 	ASSERT_RTNL();
1698 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1699 	if (err < 0)
1700 		goto out;
1701 
1702 	err = -EINVAL;
1703 	if (tb[NDA_DST] == NULL)
1704 		goto out;
1705 
1706 	ndm = nlmsg_data(nlh);
1707 	if (ndm->ndm_ifindex) {
1708 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1709 		if (dev == NULL) {
1710 			err = -ENODEV;
1711 			goto out;
1712 		}
1713 
1714 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1715 			goto out;
1716 	}
1717 
1718 	read_lock(&neigh_tbl_lock);
1719 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1720 		int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1721 		struct neighbour *neigh;
1722 		void *dst, *lladdr;
1723 
1724 		if (tbl->family != ndm->ndm_family)
1725 			continue;
1726 		read_unlock(&neigh_tbl_lock);
1727 
1728 		if (nla_len(tb[NDA_DST]) < tbl->key_len)
1729 			goto out;
1730 		dst = nla_data(tb[NDA_DST]);
1731 		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1732 
1733 		if (ndm->ndm_flags & NTF_PROXY) {
1734 			struct pneigh_entry *pn;
1735 
1736 			err = -ENOBUFS;
1737 			pn = pneigh_lookup(tbl, net, dst, dev, 1);
1738 			if (pn) {
1739 				pn->flags = ndm->ndm_flags;
1740 				err = 0;
1741 			}
1742 			goto out;
1743 		}
1744 
1745 		if (dev == NULL)
1746 			goto out;
1747 
1748 		neigh = neigh_lookup(tbl, dst, dev);
1749 		if (neigh == NULL) {
1750 			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1751 				err = -ENOENT;
1752 				goto out;
1753 			}
1754 
1755 			neigh = __neigh_lookup_errno(tbl, dst, dev);
1756 			if (IS_ERR(neigh)) {
1757 				err = PTR_ERR(neigh);
1758 				goto out;
1759 			}
1760 		} else {
1761 			if (nlh->nlmsg_flags & NLM_F_EXCL) {
1762 				err = -EEXIST;
1763 				neigh_release(neigh);
1764 				goto out;
1765 			}
1766 
1767 			if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1768 				flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1769 		}
1770 
1771 		if (ndm->ndm_flags & NTF_USE) {
1772 			neigh_event_send(neigh, NULL);
1773 			err = 0;
1774 		} else
1775 			err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1776 		neigh_release(neigh);
1777 		goto out;
1778 	}
1779 
1780 	read_unlock(&neigh_tbl_lock);
1781 	err = -EAFNOSUPPORT;
1782 out:
1783 	return err;
1784 }
1785 
1786 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1787 {
1788 	struct nlattr *nest;
1789 
1790 	nest = nla_nest_start(skb, NDTA_PARMS);
1791 	if (nest == NULL)
1792 		return -ENOBUFS;
1793 
1794 	if (parms->dev)
1795 		NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1796 
1797 	NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1798 	NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1799 	NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1800 	NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1801 	NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1802 	NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1803 	NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1804 	NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1805 		      parms->base_reachable_time);
1806 	NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1807 	NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1808 	NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1809 	NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1810 	NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1811 	NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1812 
1813 	return nla_nest_end(skb, nest);
1814 
1815 nla_put_failure:
1816 	nla_nest_cancel(skb, nest);
1817 	return -EMSGSIZE;
1818 }
1819 
1820 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1821 			      u32 pid, u32 seq, int type, int flags)
1822 {
1823 	struct nlmsghdr *nlh;
1824 	struct ndtmsg *ndtmsg;
1825 
1826 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1827 	if (nlh == NULL)
1828 		return -EMSGSIZE;
1829 
1830 	ndtmsg = nlmsg_data(nlh);
1831 
1832 	read_lock_bh(&tbl->lock);
1833 	ndtmsg->ndtm_family = tbl->family;
1834 	ndtmsg->ndtm_pad1   = 0;
1835 	ndtmsg->ndtm_pad2   = 0;
1836 
1837 	NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1838 	NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1839 	NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1840 	NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1841 	NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1842 
1843 	{
1844 		unsigned long now = jiffies;
1845 		unsigned int flush_delta = now - tbl->last_flush;
1846 		unsigned int rand_delta = now - tbl->last_rand;
1847 		struct neigh_hash_table *nht;
1848 		struct ndt_config ndc = {
1849 			.ndtc_key_len		= tbl->key_len,
1850 			.ndtc_entry_size	= tbl->entry_size,
1851 			.ndtc_entries		= atomic_read(&tbl->entries),
1852 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
1853 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
1854 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
1855 		};
1856 
1857 		rcu_read_lock_bh();
1858 		nht = rcu_dereference_bh(tbl->nht);
1859 		ndc.ndtc_hash_rnd = nht->hash_rnd;
1860 		ndc.ndtc_hash_mask = nht->hash_mask;
1861 		rcu_read_unlock_bh();
1862 
1863 		NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1864 	}
1865 
1866 	{
1867 		int cpu;
1868 		struct ndt_stats ndst;
1869 
1870 		memset(&ndst, 0, sizeof(ndst));
1871 
1872 		for_each_possible_cpu(cpu) {
1873 			struct neigh_statistics	*st;
1874 
1875 			st = per_cpu_ptr(tbl->stats, cpu);
1876 			ndst.ndts_allocs		+= st->allocs;
1877 			ndst.ndts_destroys		+= st->destroys;
1878 			ndst.ndts_hash_grows		+= st->hash_grows;
1879 			ndst.ndts_res_failed		+= st->res_failed;
1880 			ndst.ndts_lookups		+= st->lookups;
1881 			ndst.ndts_hits			+= st->hits;
1882 			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
1883 			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
1884 			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
1885 			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
1886 		}
1887 
1888 		NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1889 	}
1890 
1891 	BUG_ON(tbl->parms.dev);
1892 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1893 		goto nla_put_failure;
1894 
1895 	read_unlock_bh(&tbl->lock);
1896 	return nlmsg_end(skb, nlh);
1897 
1898 nla_put_failure:
1899 	read_unlock_bh(&tbl->lock);
1900 	nlmsg_cancel(skb, nlh);
1901 	return -EMSGSIZE;
1902 }
1903 
1904 static int neightbl_fill_param_info(struct sk_buff *skb,
1905 				    struct neigh_table *tbl,
1906 				    struct neigh_parms *parms,
1907 				    u32 pid, u32 seq, int type,
1908 				    unsigned int flags)
1909 {
1910 	struct ndtmsg *ndtmsg;
1911 	struct nlmsghdr *nlh;
1912 
1913 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1914 	if (nlh == NULL)
1915 		return -EMSGSIZE;
1916 
1917 	ndtmsg = nlmsg_data(nlh);
1918 
1919 	read_lock_bh(&tbl->lock);
1920 	ndtmsg->ndtm_family = tbl->family;
1921 	ndtmsg->ndtm_pad1   = 0;
1922 	ndtmsg->ndtm_pad2   = 0;
1923 
1924 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1925 	    neightbl_fill_parms(skb, parms) < 0)
1926 		goto errout;
1927 
1928 	read_unlock_bh(&tbl->lock);
1929 	return nlmsg_end(skb, nlh);
1930 errout:
1931 	read_unlock_bh(&tbl->lock);
1932 	nlmsg_cancel(skb, nlh);
1933 	return -EMSGSIZE;
1934 }
1935 
1936 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1937 	[NDTA_NAME]		= { .type = NLA_STRING },
1938 	[NDTA_THRESH1]		= { .type = NLA_U32 },
1939 	[NDTA_THRESH2]		= { .type = NLA_U32 },
1940 	[NDTA_THRESH3]		= { .type = NLA_U32 },
1941 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
1942 	[NDTA_PARMS]		= { .type = NLA_NESTED },
1943 };
1944 
1945 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1946 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
1947 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
1948 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
1949 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
1950 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
1951 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
1952 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
1953 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
1954 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
1955 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
1956 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
1957 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
1958 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
1959 };
1960 
1961 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1962 {
1963 	struct net *net = sock_net(skb->sk);
1964 	struct neigh_table *tbl;
1965 	struct ndtmsg *ndtmsg;
1966 	struct nlattr *tb[NDTA_MAX+1];
1967 	int err;
1968 
1969 	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1970 			  nl_neightbl_policy);
1971 	if (err < 0)
1972 		goto errout;
1973 
1974 	if (tb[NDTA_NAME] == NULL) {
1975 		err = -EINVAL;
1976 		goto errout;
1977 	}
1978 
1979 	ndtmsg = nlmsg_data(nlh);
1980 	read_lock(&neigh_tbl_lock);
1981 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1982 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1983 			continue;
1984 
1985 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1986 			break;
1987 	}
1988 
1989 	if (tbl == NULL) {
1990 		err = -ENOENT;
1991 		goto errout_locked;
1992 	}
1993 
1994 	/*
1995 	 * We acquire tbl->lock to be nice to the periodic timers and
1996 	 * make sure they always see a consistent set of values.
1997 	 */
1998 	write_lock_bh(&tbl->lock);
1999 
2000 	if (tb[NDTA_PARMS]) {
2001 		struct nlattr *tbp[NDTPA_MAX+1];
2002 		struct neigh_parms *p;
2003 		int i, ifindex = 0;
2004 
2005 		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2006 				       nl_ntbl_parm_policy);
2007 		if (err < 0)
2008 			goto errout_tbl_lock;
2009 
2010 		if (tbp[NDTPA_IFINDEX])
2011 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2012 
2013 		p = lookup_neigh_parms(tbl, net, ifindex);
2014 		if (p == NULL) {
2015 			err = -ENOENT;
2016 			goto errout_tbl_lock;
2017 		}
2018 
2019 		for (i = 1; i <= NDTPA_MAX; i++) {
2020 			if (tbp[i] == NULL)
2021 				continue;
2022 
2023 			switch (i) {
2024 			case NDTPA_QUEUE_LEN:
2025 				p->queue_len = nla_get_u32(tbp[i]);
2026 				break;
2027 			case NDTPA_PROXY_QLEN:
2028 				p->proxy_qlen = nla_get_u32(tbp[i]);
2029 				break;
2030 			case NDTPA_APP_PROBES:
2031 				p->app_probes = nla_get_u32(tbp[i]);
2032 				break;
2033 			case NDTPA_UCAST_PROBES:
2034 				p->ucast_probes = nla_get_u32(tbp[i]);
2035 				break;
2036 			case NDTPA_MCAST_PROBES:
2037 				p->mcast_probes = nla_get_u32(tbp[i]);
2038 				break;
2039 			case NDTPA_BASE_REACHABLE_TIME:
2040 				p->base_reachable_time = nla_get_msecs(tbp[i]);
2041 				break;
2042 			case NDTPA_GC_STALETIME:
2043 				p->gc_staletime = nla_get_msecs(tbp[i]);
2044 				break;
2045 			case NDTPA_DELAY_PROBE_TIME:
2046 				p->delay_probe_time = nla_get_msecs(tbp[i]);
2047 				break;
2048 			case NDTPA_RETRANS_TIME:
2049 				p->retrans_time = nla_get_msecs(tbp[i]);
2050 				break;
2051 			case NDTPA_ANYCAST_DELAY:
2052 				p->anycast_delay = nla_get_msecs(tbp[i]);
2053 				break;
2054 			case NDTPA_PROXY_DELAY:
2055 				p->proxy_delay = nla_get_msecs(tbp[i]);
2056 				break;
2057 			case NDTPA_LOCKTIME:
2058 				p->locktime = nla_get_msecs(tbp[i]);
2059 				break;
2060 			}
2061 		}
2062 	}
2063 
2064 	if (tb[NDTA_THRESH1])
2065 		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2066 
2067 	if (tb[NDTA_THRESH2])
2068 		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2069 
2070 	if (tb[NDTA_THRESH3])
2071 		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2072 
2073 	if (tb[NDTA_GC_INTERVAL])
2074 		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2075 
2076 	err = 0;
2077 
2078 errout_tbl_lock:
2079 	write_unlock_bh(&tbl->lock);
2080 errout_locked:
2081 	read_unlock(&neigh_tbl_lock);
2082 errout:
2083 	return err;
2084 }
2085 
2086 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2087 {
2088 	struct net *net = sock_net(skb->sk);
2089 	int family, tidx, nidx = 0;
2090 	int tbl_skip = cb->args[0];
2091 	int neigh_skip = cb->args[1];
2092 	struct neigh_table *tbl;
2093 
2094 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2095 
2096 	read_lock(&neigh_tbl_lock);
2097 	for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2098 		struct neigh_parms *p;
2099 
2100 		if (tidx < tbl_skip || (family && tbl->family != family))
2101 			continue;
2102 
2103 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2104 				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2105 				       NLM_F_MULTI) <= 0)
2106 			break;
2107 
2108 		for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2109 			if (!net_eq(neigh_parms_net(p), net))
2110 				continue;
2111 
2112 			if (nidx < neigh_skip)
2113 				goto next;
2114 
2115 			if (neightbl_fill_param_info(skb, tbl, p,
2116 						     NETLINK_CB(cb->skb).pid,
2117 						     cb->nlh->nlmsg_seq,
2118 						     RTM_NEWNEIGHTBL,
2119 						     NLM_F_MULTI) <= 0)
2120 				goto out;
2121 		next:
2122 			nidx++;
2123 		}
2124 
2125 		neigh_skip = 0;
2126 	}
2127 out:
2128 	read_unlock(&neigh_tbl_lock);
2129 	cb->args[0] = tidx;
2130 	cb->args[1] = nidx;
2131 
2132 	return skb->len;
2133 }
2134 
2135 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2136 			   u32 pid, u32 seq, int type, unsigned int flags)
2137 {
2138 	unsigned long now = jiffies;
2139 	struct nda_cacheinfo ci;
2140 	struct nlmsghdr *nlh;
2141 	struct ndmsg *ndm;
2142 
2143 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2144 	if (nlh == NULL)
2145 		return -EMSGSIZE;
2146 
2147 	ndm = nlmsg_data(nlh);
2148 	ndm->ndm_family	 = neigh->ops->family;
2149 	ndm->ndm_pad1    = 0;
2150 	ndm->ndm_pad2    = 0;
2151 	ndm->ndm_flags	 = neigh->flags;
2152 	ndm->ndm_type	 = neigh->type;
2153 	ndm->ndm_ifindex = neigh->dev->ifindex;
2154 
2155 	NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2156 
2157 	read_lock_bh(&neigh->lock);
2158 	ndm->ndm_state	 = neigh->nud_state;
2159 	if (neigh->nud_state & NUD_VALID) {
2160 		char haddr[MAX_ADDR_LEN];
2161 
2162 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2163 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2164 			read_unlock_bh(&neigh->lock);
2165 			goto nla_put_failure;
2166 		}
2167 	}
2168 
2169 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2170 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2171 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2172 	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
2173 	read_unlock_bh(&neigh->lock);
2174 
2175 	NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2176 	NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2177 
2178 	return nlmsg_end(skb, nlh);
2179 
2180 nla_put_failure:
2181 	nlmsg_cancel(skb, nlh);
2182 	return -EMSGSIZE;
2183 }
2184 
2185 static void neigh_update_notify(struct neighbour *neigh)
2186 {
2187 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2188 	__neigh_notify(neigh, RTM_NEWNEIGH, 0);
2189 }
2190 
2191 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2192 			    struct netlink_callback *cb)
2193 {
2194 	struct net *net = sock_net(skb->sk);
2195 	struct neighbour *n;
2196 	int rc, h, s_h = cb->args[1];
2197 	int idx, s_idx = idx = cb->args[2];
2198 	struct neigh_hash_table *nht;
2199 
2200 	rcu_read_lock_bh();
2201 	nht = rcu_dereference_bh(tbl->nht);
2202 
2203 	for (h = 0; h <= nht->hash_mask; h++) {
2204 		if (h < s_h)
2205 			continue;
2206 		if (h > s_h)
2207 			s_idx = 0;
2208 		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2209 		     n != NULL;
2210 		     n = rcu_dereference_bh(n->next)) {
2211 			if (!net_eq(dev_net(n->dev), net))
2212 				continue;
2213 			if (idx < s_idx)
2214 				goto next;
2215 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2216 					    cb->nlh->nlmsg_seq,
2217 					    RTM_NEWNEIGH,
2218 					    NLM_F_MULTI) <= 0) {
2219 				rc = -1;
2220 				goto out;
2221 			}
2222 next:
2223 			idx++;
2224 		}
2225 	}
2226 	rc = skb->len;
2227 out:
2228 	rcu_read_unlock_bh();
2229 	cb->args[1] = h;
2230 	cb->args[2] = idx;
2231 	return rc;
2232 }
2233 
2234 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2235 {
2236 	struct neigh_table *tbl;
2237 	int t, family, s_t;
2238 
2239 	read_lock(&neigh_tbl_lock);
2240 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2241 	s_t = cb->args[0];
2242 
2243 	for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2244 		if (t < s_t || (family && tbl->family != family))
2245 			continue;
2246 		if (t > s_t)
2247 			memset(&cb->args[1], 0, sizeof(cb->args) -
2248 						sizeof(cb->args[0]));
2249 		if (neigh_dump_table(tbl, skb, cb) < 0)
2250 			break;
2251 	}
2252 	read_unlock(&neigh_tbl_lock);
2253 
2254 	cb->args[0] = t;
2255 	return skb->len;
2256 }
2257 
2258 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2259 {
2260 	int chain;
2261 	struct neigh_hash_table *nht;
2262 
2263 	rcu_read_lock_bh();
2264 	nht = rcu_dereference_bh(tbl->nht);
2265 
2266 	read_lock(&tbl->lock); /* avoid resizes */
2267 	for (chain = 0; chain <= nht->hash_mask; chain++) {
2268 		struct neighbour *n;
2269 
2270 		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2271 		     n != NULL;
2272 		     n = rcu_dereference_bh(n->next))
2273 			cb(n, cookie);
2274 	}
2275 	read_unlock(&tbl->lock);
2276 	rcu_read_unlock_bh();
2277 }
2278 EXPORT_SYMBOL(neigh_for_each);
2279 
2280 /* The tbl->lock must be held as a writer and BH disabled. */
2281 void __neigh_for_each_release(struct neigh_table *tbl,
2282 			      int (*cb)(struct neighbour *))
2283 {
2284 	int chain;
2285 	struct neigh_hash_table *nht;
2286 
2287 	nht = rcu_dereference_protected(tbl->nht,
2288 					lockdep_is_held(&tbl->lock));
2289 	for (chain = 0; chain <= nht->hash_mask; chain++) {
2290 		struct neighbour *n;
2291 		struct neighbour __rcu **np;
2292 
2293 		np = &nht->hash_buckets[chain];
2294 		while ((n = rcu_dereference_protected(*np,
2295 					lockdep_is_held(&tbl->lock))) != NULL) {
2296 			int release;
2297 
2298 			write_lock(&n->lock);
2299 			release = cb(n);
2300 			if (release) {
2301 				rcu_assign_pointer(*np,
2302 					rcu_dereference_protected(n->next,
2303 						lockdep_is_held(&tbl->lock)));
2304 				n->dead = 1;
2305 			} else
2306 				np = &n->next;
2307 			write_unlock(&n->lock);
2308 			if (release)
2309 				neigh_cleanup_and_release(n);
2310 		}
2311 	}
2312 }
2313 EXPORT_SYMBOL(__neigh_for_each_release);
2314 
2315 #ifdef CONFIG_PROC_FS
2316 
2317 static struct neighbour *neigh_get_first(struct seq_file *seq)
2318 {
2319 	struct neigh_seq_state *state = seq->private;
2320 	struct net *net = seq_file_net(seq);
2321 	struct neigh_hash_table *nht = state->nht;
2322 	struct neighbour *n = NULL;
2323 	int bucket = state->bucket;
2324 
2325 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2326 	for (bucket = 0; bucket <= nht->hash_mask; bucket++) {
2327 		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2328 
2329 		while (n) {
2330 			if (!net_eq(dev_net(n->dev), net))
2331 				goto next;
2332 			if (state->neigh_sub_iter) {
2333 				loff_t fakep = 0;
2334 				void *v;
2335 
2336 				v = state->neigh_sub_iter(state, n, &fakep);
2337 				if (!v)
2338 					goto next;
2339 			}
2340 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2341 				break;
2342 			if (n->nud_state & ~NUD_NOARP)
2343 				break;
2344 next:
2345 			n = rcu_dereference_bh(n->next);
2346 		}
2347 
2348 		if (n)
2349 			break;
2350 	}
2351 	state->bucket = bucket;
2352 
2353 	return n;
2354 }
2355 
2356 static struct neighbour *neigh_get_next(struct seq_file *seq,
2357 					struct neighbour *n,
2358 					loff_t *pos)
2359 {
2360 	struct neigh_seq_state *state = seq->private;
2361 	struct net *net = seq_file_net(seq);
2362 	struct neigh_hash_table *nht = state->nht;
2363 
2364 	if (state->neigh_sub_iter) {
2365 		void *v = state->neigh_sub_iter(state, n, pos);
2366 		if (v)
2367 			return n;
2368 	}
2369 	n = rcu_dereference_bh(n->next);
2370 
2371 	while (1) {
2372 		while (n) {
2373 			if (!net_eq(dev_net(n->dev), net))
2374 				goto next;
2375 			if (state->neigh_sub_iter) {
2376 				void *v = state->neigh_sub_iter(state, n, pos);
2377 				if (v)
2378 					return n;
2379 				goto next;
2380 			}
2381 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2382 				break;
2383 
2384 			if (n->nud_state & ~NUD_NOARP)
2385 				break;
2386 next:
2387 			n = rcu_dereference_bh(n->next);
2388 		}
2389 
2390 		if (n)
2391 			break;
2392 
2393 		if (++state->bucket > nht->hash_mask)
2394 			break;
2395 
2396 		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2397 	}
2398 
2399 	if (n && pos)
2400 		--(*pos);
2401 	return n;
2402 }
2403 
2404 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2405 {
2406 	struct neighbour *n = neigh_get_first(seq);
2407 
2408 	if (n) {
2409 		--(*pos);
2410 		while (*pos) {
2411 			n = neigh_get_next(seq, n, pos);
2412 			if (!n)
2413 				break;
2414 		}
2415 	}
2416 	return *pos ? NULL : n;
2417 }
2418 
2419 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2420 {
2421 	struct neigh_seq_state *state = seq->private;
2422 	struct net *net = seq_file_net(seq);
2423 	struct neigh_table *tbl = state->tbl;
2424 	struct pneigh_entry *pn = NULL;
2425 	int bucket = state->bucket;
2426 
2427 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
2428 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2429 		pn = tbl->phash_buckets[bucket];
2430 		while (pn && !net_eq(pneigh_net(pn), net))
2431 			pn = pn->next;
2432 		if (pn)
2433 			break;
2434 	}
2435 	state->bucket = bucket;
2436 
2437 	return pn;
2438 }
2439 
2440 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2441 					    struct pneigh_entry *pn,
2442 					    loff_t *pos)
2443 {
2444 	struct neigh_seq_state *state = seq->private;
2445 	struct net *net = seq_file_net(seq);
2446 	struct neigh_table *tbl = state->tbl;
2447 
2448 	pn = pn->next;
2449 	while (!pn) {
2450 		if (++state->bucket > PNEIGH_HASHMASK)
2451 			break;
2452 		pn = tbl->phash_buckets[state->bucket];
2453 		while (pn && !net_eq(pneigh_net(pn), net))
2454 			pn = pn->next;
2455 		if (pn)
2456 			break;
2457 	}
2458 
2459 	if (pn && pos)
2460 		--(*pos);
2461 
2462 	return pn;
2463 }
2464 
2465 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2466 {
2467 	struct pneigh_entry *pn = pneigh_get_first(seq);
2468 
2469 	if (pn) {
2470 		--(*pos);
2471 		while (*pos) {
2472 			pn = pneigh_get_next(seq, pn, pos);
2473 			if (!pn)
2474 				break;
2475 		}
2476 	}
2477 	return *pos ? NULL : pn;
2478 }
2479 
2480 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2481 {
2482 	struct neigh_seq_state *state = seq->private;
2483 	void *rc;
2484 	loff_t idxpos = *pos;
2485 
2486 	rc = neigh_get_idx(seq, &idxpos);
2487 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2488 		rc = pneigh_get_idx(seq, &idxpos);
2489 
2490 	return rc;
2491 }
2492 
2493 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2494 	__acquires(rcu_bh)
2495 {
2496 	struct neigh_seq_state *state = seq->private;
2497 
2498 	state->tbl = tbl;
2499 	state->bucket = 0;
2500 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2501 
2502 	rcu_read_lock_bh();
2503 	state->nht = rcu_dereference_bh(tbl->nht);
2504 
2505 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2506 }
2507 EXPORT_SYMBOL(neigh_seq_start);
2508 
2509 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2510 {
2511 	struct neigh_seq_state *state;
2512 	void *rc;
2513 
2514 	if (v == SEQ_START_TOKEN) {
2515 		rc = neigh_get_first(seq);
2516 		goto out;
2517 	}
2518 
2519 	state = seq->private;
2520 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2521 		rc = neigh_get_next(seq, v, NULL);
2522 		if (rc)
2523 			goto out;
2524 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2525 			rc = pneigh_get_first(seq);
2526 	} else {
2527 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2528 		rc = pneigh_get_next(seq, v, NULL);
2529 	}
2530 out:
2531 	++(*pos);
2532 	return rc;
2533 }
2534 EXPORT_SYMBOL(neigh_seq_next);
2535 
2536 void neigh_seq_stop(struct seq_file *seq, void *v)
2537 	__releases(rcu_bh)
2538 {
2539 	rcu_read_unlock_bh();
2540 }
2541 EXPORT_SYMBOL(neigh_seq_stop);
2542 
2543 /* statistics via seq_file */
2544 
2545 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2546 {
2547 	struct neigh_table *tbl = seq->private;
2548 	int cpu;
2549 
2550 	if (*pos == 0)
2551 		return SEQ_START_TOKEN;
2552 
2553 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2554 		if (!cpu_possible(cpu))
2555 			continue;
2556 		*pos = cpu+1;
2557 		return per_cpu_ptr(tbl->stats, cpu);
2558 	}
2559 	return NULL;
2560 }
2561 
2562 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2563 {
2564 	struct neigh_table *tbl = seq->private;
2565 	int cpu;
2566 
2567 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2568 		if (!cpu_possible(cpu))
2569 			continue;
2570 		*pos = cpu+1;
2571 		return per_cpu_ptr(tbl->stats, cpu);
2572 	}
2573 	return NULL;
2574 }
2575 
2576 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2577 {
2578 
2579 }
2580 
2581 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2582 {
2583 	struct neigh_table *tbl = seq->private;
2584 	struct neigh_statistics *st = v;
2585 
2586 	if (v == SEQ_START_TOKEN) {
2587 		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2588 		return 0;
2589 	}
2590 
2591 	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2592 			"%08lx %08lx  %08lx %08lx %08lx\n",
2593 		   atomic_read(&tbl->entries),
2594 
2595 		   st->allocs,
2596 		   st->destroys,
2597 		   st->hash_grows,
2598 
2599 		   st->lookups,
2600 		   st->hits,
2601 
2602 		   st->res_failed,
2603 
2604 		   st->rcv_probes_mcast,
2605 		   st->rcv_probes_ucast,
2606 
2607 		   st->periodic_gc_runs,
2608 		   st->forced_gc_runs,
2609 		   st->unres_discards
2610 		   );
2611 
2612 	return 0;
2613 }
2614 
2615 static const struct seq_operations neigh_stat_seq_ops = {
2616 	.start	= neigh_stat_seq_start,
2617 	.next	= neigh_stat_seq_next,
2618 	.stop	= neigh_stat_seq_stop,
2619 	.show	= neigh_stat_seq_show,
2620 };
2621 
2622 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2623 {
2624 	int ret = seq_open(file, &neigh_stat_seq_ops);
2625 
2626 	if (!ret) {
2627 		struct seq_file *sf = file->private_data;
2628 		sf->private = PDE(inode)->data;
2629 	}
2630 	return ret;
2631 };
2632 
2633 static const struct file_operations neigh_stat_seq_fops = {
2634 	.owner	 = THIS_MODULE,
2635 	.open 	 = neigh_stat_seq_open,
2636 	.read	 = seq_read,
2637 	.llseek	 = seq_lseek,
2638 	.release = seq_release,
2639 };
2640 
2641 #endif /* CONFIG_PROC_FS */
2642 
2643 static inline size_t neigh_nlmsg_size(void)
2644 {
2645 	return NLMSG_ALIGN(sizeof(struct ndmsg))
2646 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2647 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2648 	       + nla_total_size(sizeof(struct nda_cacheinfo))
2649 	       + nla_total_size(4); /* NDA_PROBES */
2650 }
2651 
2652 static void __neigh_notify(struct neighbour *n, int type, int flags)
2653 {
2654 	struct net *net = dev_net(n->dev);
2655 	struct sk_buff *skb;
2656 	int err = -ENOBUFS;
2657 
2658 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2659 	if (skb == NULL)
2660 		goto errout;
2661 
2662 	err = neigh_fill_info(skb, n, 0, 0, type, flags);
2663 	if (err < 0) {
2664 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2665 		WARN_ON(err == -EMSGSIZE);
2666 		kfree_skb(skb);
2667 		goto errout;
2668 	}
2669 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2670 	return;
2671 errout:
2672 	if (err < 0)
2673 		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2674 }
2675 
2676 #ifdef CONFIG_ARPD
2677 void neigh_app_ns(struct neighbour *n)
2678 {
2679 	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2680 }
2681 EXPORT_SYMBOL(neigh_app_ns);
2682 #endif /* CONFIG_ARPD */
2683 
2684 #ifdef CONFIG_SYSCTL
2685 
2686 #define NEIGH_VARS_MAX 19
2687 
2688 static struct neigh_sysctl_table {
2689 	struct ctl_table_header *sysctl_header;
2690 	struct ctl_table neigh_vars[NEIGH_VARS_MAX];
2691 	char *dev_name;
2692 } neigh_sysctl_template __read_mostly = {
2693 	.neigh_vars = {
2694 		{
2695 			.procname	= "mcast_solicit",
2696 			.maxlen		= sizeof(int),
2697 			.mode		= 0644,
2698 			.proc_handler	= proc_dointvec,
2699 		},
2700 		{
2701 			.procname	= "ucast_solicit",
2702 			.maxlen		= sizeof(int),
2703 			.mode		= 0644,
2704 			.proc_handler	= proc_dointvec,
2705 		},
2706 		{
2707 			.procname	= "app_solicit",
2708 			.maxlen		= sizeof(int),
2709 			.mode		= 0644,
2710 			.proc_handler	= proc_dointvec,
2711 		},
2712 		{
2713 			.procname	= "retrans_time",
2714 			.maxlen		= sizeof(int),
2715 			.mode		= 0644,
2716 			.proc_handler	= proc_dointvec_userhz_jiffies,
2717 		},
2718 		{
2719 			.procname	= "base_reachable_time",
2720 			.maxlen		= sizeof(int),
2721 			.mode		= 0644,
2722 			.proc_handler	= proc_dointvec_jiffies,
2723 		},
2724 		{
2725 			.procname	= "delay_first_probe_time",
2726 			.maxlen		= sizeof(int),
2727 			.mode		= 0644,
2728 			.proc_handler	= proc_dointvec_jiffies,
2729 		},
2730 		{
2731 			.procname	= "gc_stale_time",
2732 			.maxlen		= sizeof(int),
2733 			.mode		= 0644,
2734 			.proc_handler	= proc_dointvec_jiffies,
2735 		},
2736 		{
2737 			.procname	= "unres_qlen",
2738 			.maxlen		= sizeof(int),
2739 			.mode		= 0644,
2740 			.proc_handler	= proc_dointvec,
2741 		},
2742 		{
2743 			.procname	= "proxy_qlen",
2744 			.maxlen		= sizeof(int),
2745 			.mode		= 0644,
2746 			.proc_handler	= proc_dointvec,
2747 		},
2748 		{
2749 			.procname	= "anycast_delay",
2750 			.maxlen		= sizeof(int),
2751 			.mode		= 0644,
2752 			.proc_handler	= proc_dointvec_userhz_jiffies,
2753 		},
2754 		{
2755 			.procname	= "proxy_delay",
2756 			.maxlen		= sizeof(int),
2757 			.mode		= 0644,
2758 			.proc_handler	= proc_dointvec_userhz_jiffies,
2759 		},
2760 		{
2761 			.procname	= "locktime",
2762 			.maxlen		= sizeof(int),
2763 			.mode		= 0644,
2764 			.proc_handler	= proc_dointvec_userhz_jiffies,
2765 		},
2766 		{
2767 			.procname	= "retrans_time_ms",
2768 			.maxlen		= sizeof(int),
2769 			.mode		= 0644,
2770 			.proc_handler	= proc_dointvec_ms_jiffies,
2771 		},
2772 		{
2773 			.procname	= "base_reachable_time_ms",
2774 			.maxlen		= sizeof(int),
2775 			.mode		= 0644,
2776 			.proc_handler	= proc_dointvec_ms_jiffies,
2777 		},
2778 		{
2779 			.procname	= "gc_interval",
2780 			.maxlen		= sizeof(int),
2781 			.mode		= 0644,
2782 			.proc_handler	= proc_dointvec_jiffies,
2783 		},
2784 		{
2785 			.procname	= "gc_thresh1",
2786 			.maxlen		= sizeof(int),
2787 			.mode		= 0644,
2788 			.proc_handler	= proc_dointvec,
2789 		},
2790 		{
2791 			.procname	= "gc_thresh2",
2792 			.maxlen		= sizeof(int),
2793 			.mode		= 0644,
2794 			.proc_handler	= proc_dointvec,
2795 		},
2796 		{
2797 			.procname	= "gc_thresh3",
2798 			.maxlen		= sizeof(int),
2799 			.mode		= 0644,
2800 			.proc_handler	= proc_dointvec,
2801 		},
2802 		{},
2803 	},
2804 };
2805 
2806 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2807 			  char *p_name, proc_handler *handler)
2808 {
2809 	struct neigh_sysctl_table *t;
2810 	const char *dev_name_source = NULL;
2811 
2812 #define NEIGH_CTL_PATH_ROOT	0
2813 #define NEIGH_CTL_PATH_PROTO	1
2814 #define NEIGH_CTL_PATH_NEIGH	2
2815 #define NEIGH_CTL_PATH_DEV	3
2816 
2817 	struct ctl_path neigh_path[] = {
2818 		{ .procname = "net",	 },
2819 		{ .procname = "proto",	 },
2820 		{ .procname = "neigh",	 },
2821 		{ .procname = "default", },
2822 		{ },
2823 	};
2824 
2825 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2826 	if (!t)
2827 		goto err;
2828 
2829 	t->neigh_vars[0].data  = &p->mcast_probes;
2830 	t->neigh_vars[1].data  = &p->ucast_probes;
2831 	t->neigh_vars[2].data  = &p->app_probes;
2832 	t->neigh_vars[3].data  = &p->retrans_time;
2833 	t->neigh_vars[4].data  = &p->base_reachable_time;
2834 	t->neigh_vars[5].data  = &p->delay_probe_time;
2835 	t->neigh_vars[6].data  = &p->gc_staletime;
2836 	t->neigh_vars[7].data  = &p->queue_len;
2837 	t->neigh_vars[8].data  = &p->proxy_qlen;
2838 	t->neigh_vars[9].data  = &p->anycast_delay;
2839 	t->neigh_vars[10].data = &p->proxy_delay;
2840 	t->neigh_vars[11].data = &p->locktime;
2841 	t->neigh_vars[12].data  = &p->retrans_time;
2842 	t->neigh_vars[13].data  = &p->base_reachable_time;
2843 
2844 	if (dev) {
2845 		dev_name_source = dev->name;
2846 		/* Terminate the table early */
2847 		memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
2848 	} else {
2849 		dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2850 		t->neigh_vars[14].data = (int *)(p + 1);
2851 		t->neigh_vars[15].data = (int *)(p + 1) + 1;
2852 		t->neigh_vars[16].data = (int *)(p + 1) + 2;
2853 		t->neigh_vars[17].data = (int *)(p + 1) + 3;
2854 	}
2855 
2856 
2857 	if (handler) {
2858 		/* RetransTime */
2859 		t->neigh_vars[3].proc_handler = handler;
2860 		t->neigh_vars[3].extra1 = dev;
2861 		/* ReachableTime */
2862 		t->neigh_vars[4].proc_handler = handler;
2863 		t->neigh_vars[4].extra1 = dev;
2864 		/* RetransTime (in milliseconds)*/
2865 		t->neigh_vars[12].proc_handler = handler;
2866 		t->neigh_vars[12].extra1 = dev;
2867 		/* ReachableTime (in milliseconds) */
2868 		t->neigh_vars[13].proc_handler = handler;
2869 		t->neigh_vars[13].extra1 = dev;
2870 	}
2871 
2872 	t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2873 	if (!t->dev_name)
2874 		goto free;
2875 
2876 	neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2877 	neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2878 
2879 	t->sysctl_header =
2880 		register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2881 	if (!t->sysctl_header)
2882 		goto free_procname;
2883 
2884 	p->sysctl_table = t;
2885 	return 0;
2886 
2887 free_procname:
2888 	kfree(t->dev_name);
2889 free:
2890 	kfree(t);
2891 err:
2892 	return -ENOBUFS;
2893 }
2894 EXPORT_SYMBOL(neigh_sysctl_register);
2895 
2896 void neigh_sysctl_unregister(struct neigh_parms *p)
2897 {
2898 	if (p->sysctl_table) {
2899 		struct neigh_sysctl_table *t = p->sysctl_table;
2900 		p->sysctl_table = NULL;
2901 		unregister_sysctl_table(t->sysctl_header);
2902 		kfree(t->dev_name);
2903 		kfree(t);
2904 	}
2905 }
2906 EXPORT_SYMBOL(neigh_sysctl_unregister);
2907 
2908 #endif	/* CONFIG_SYSCTL */
2909 
2910 static int __init neigh_init(void)
2911 {
2912 	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL);
2913 	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL);
2914 	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info);
2915 
2916 	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info);
2917 	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL);
2918 
2919 	return 0;
2920 }
2921 
2922 subsys_initcall(neigh_init);
2923 
2924