xref: /openbmc/linux/net/core/neighbour.c (revision 54525552)
1 /*
2  *	Generic address resolution entity
3  *
4  *	Authors:
5  *	Pedro Roque		<roque@di.fc.ul.pt>
6  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *	Fixes:
14  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
15  *	Harald Welte		Add neighbour cache statistics like rtstat
16  */
17 
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 
40 #define NEIGH_DEBUG 1
41 
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46 
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
55 
56 #define PNEIGH_HASHMASK		0xF
57 
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62 
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
67 
68 /*
69    Neighbour hash table buckets are protected with rwlock tbl->lock.
70 
71    - All the scans/updates to hash buckets MUST be made under this lock.
72    - NOTHING clever should be made under this lock: no callbacks
73      to protocol backends, no attempts to send something to network.
74      It will result in deadlocks, if backend/driver wants to use neighbour
75      cache.
76    - If the entry requires some non-trivial actions, increase
77      its reference count and release table lock.
78 
79    Neighbour entries are protected:
80    - with reference count.
81    - with rwlock neigh->lock
82 
83    Reference count prevents destruction.
84 
85    neigh->lock mainly serializes ll address data and its validity state.
86    However, the same lock is used to protect another entry fields:
87     - timer
88     - resolution queue
89 
90    Again, nothing clever shall be made under neigh->lock,
91    the most complicated procedure, which we allow is dev->hard_header.
92    It is supposed, that dev->hard_header is simplistic and does
93    not make callbacks to neighbour tables.
94 
95    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96    list of neighbour tables. This list is used only in process context,
97  */
98 
99 static DEFINE_RWLOCK(neigh_tbl_lock);
100 
101 static int neigh_blackhole(struct sk_buff *skb)
102 {
103 	kfree_skb(skb);
104 	return -ENETDOWN;
105 }
106 
107 static void neigh_cleanup_and_release(struct neighbour *neigh)
108 {
109 	if (neigh->parms->neigh_cleanup)
110 		neigh->parms->neigh_cleanup(neigh);
111 
112 	__neigh_notify(neigh, RTM_DELNEIGH, 0);
113 	neigh_release(neigh);
114 }
115 
116 /*
117  * It is random distribution in the interval (1/2)*base...(3/2)*base.
118  * It corresponds to default IPv6 settings and is not overridable,
119  * because it is really reasonable choice.
120  */
121 
122 unsigned long neigh_rand_reach_time(unsigned long base)
123 {
124 	return base ? (net_random() % base) + (base >> 1) : 0;
125 }
126 EXPORT_SYMBOL(neigh_rand_reach_time);
127 
128 
129 static int neigh_forced_gc(struct neigh_table *tbl)
130 {
131 	int shrunk = 0;
132 	int i;
133 	struct neigh_hash_table *nht;
134 
135 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136 
137 	write_lock_bh(&tbl->lock);
138 	nht = rcu_dereference_protected(tbl->nht,
139 					lockdep_is_held(&tbl->lock));
140 	for (i = 0; i <= nht->hash_mask; i++) {
141 		struct neighbour *n;
142 		struct neighbour __rcu **np;
143 
144 		np = &nht->hash_buckets[i];
145 		while ((n = rcu_dereference_protected(*np,
146 					lockdep_is_held(&tbl->lock))) != NULL) {
147 			/* Neighbour record may be discarded if:
148 			 * - nobody refers to it.
149 			 * - it is not permanent
150 			 */
151 			write_lock(&n->lock);
152 			if (atomic_read(&n->refcnt) == 1 &&
153 			    !(n->nud_state & NUD_PERMANENT)) {
154 				rcu_assign_pointer(*np,
155 					rcu_dereference_protected(n->next,
156 						  lockdep_is_held(&tbl->lock)));
157 				n->dead = 1;
158 				shrunk	= 1;
159 				write_unlock(&n->lock);
160 				neigh_cleanup_and_release(n);
161 				continue;
162 			}
163 			write_unlock(&n->lock);
164 			np = &n->next;
165 		}
166 	}
167 
168 	tbl->last_flush = jiffies;
169 
170 	write_unlock_bh(&tbl->lock);
171 
172 	return shrunk;
173 }
174 
175 static void neigh_add_timer(struct neighbour *n, unsigned long when)
176 {
177 	neigh_hold(n);
178 	if (unlikely(mod_timer(&n->timer, when))) {
179 		printk("NEIGH: BUG, double timer add, state is %x\n",
180 		       n->nud_state);
181 		dump_stack();
182 	}
183 }
184 
185 static int neigh_del_timer(struct neighbour *n)
186 {
187 	if ((n->nud_state & NUD_IN_TIMER) &&
188 	    del_timer(&n->timer)) {
189 		neigh_release(n);
190 		return 1;
191 	}
192 	return 0;
193 }
194 
195 static void pneigh_queue_purge(struct sk_buff_head *list)
196 {
197 	struct sk_buff *skb;
198 
199 	while ((skb = skb_dequeue(list)) != NULL) {
200 		dev_put(skb->dev);
201 		kfree_skb(skb);
202 	}
203 }
204 
205 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
206 {
207 	int i;
208 	struct neigh_hash_table *nht;
209 
210 	nht = rcu_dereference_protected(tbl->nht,
211 					lockdep_is_held(&tbl->lock));
212 
213 	for (i = 0; i <= nht->hash_mask; i++) {
214 		struct neighbour *n;
215 		struct neighbour __rcu **np = &nht->hash_buckets[i];
216 
217 		while ((n = rcu_dereference_protected(*np,
218 					lockdep_is_held(&tbl->lock))) != NULL) {
219 			if (dev && n->dev != dev) {
220 				np = &n->next;
221 				continue;
222 			}
223 			rcu_assign_pointer(*np,
224 				   rcu_dereference_protected(n->next,
225 						lockdep_is_held(&tbl->lock)));
226 			write_lock(&n->lock);
227 			neigh_del_timer(n);
228 			n->dead = 1;
229 
230 			if (atomic_read(&n->refcnt) != 1) {
231 				/* The most unpleasant situation.
232 				   We must destroy neighbour entry,
233 				   but someone still uses it.
234 
235 				   The destroy will be delayed until
236 				   the last user releases us, but
237 				   we must kill timers etc. and move
238 				   it to safe state.
239 				 */
240 				skb_queue_purge(&n->arp_queue);
241 				n->output = neigh_blackhole;
242 				if (n->nud_state & NUD_VALID)
243 					n->nud_state = NUD_NOARP;
244 				else
245 					n->nud_state = NUD_NONE;
246 				NEIGH_PRINTK2("neigh %p is stray.\n", n);
247 			}
248 			write_unlock(&n->lock);
249 			neigh_cleanup_and_release(n);
250 		}
251 	}
252 }
253 
254 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
255 {
256 	write_lock_bh(&tbl->lock);
257 	neigh_flush_dev(tbl, dev);
258 	write_unlock_bh(&tbl->lock);
259 }
260 EXPORT_SYMBOL(neigh_changeaddr);
261 
262 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
263 {
264 	write_lock_bh(&tbl->lock);
265 	neigh_flush_dev(tbl, dev);
266 	pneigh_ifdown(tbl, dev);
267 	write_unlock_bh(&tbl->lock);
268 
269 	del_timer_sync(&tbl->proxy_timer);
270 	pneigh_queue_purge(&tbl->proxy_queue);
271 	return 0;
272 }
273 EXPORT_SYMBOL(neigh_ifdown);
274 
275 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
276 {
277 	struct neighbour *n = NULL;
278 	unsigned long now = jiffies;
279 	int entries;
280 
281 	entries = atomic_inc_return(&tbl->entries) - 1;
282 	if (entries >= tbl->gc_thresh3 ||
283 	    (entries >= tbl->gc_thresh2 &&
284 	     time_after(now, tbl->last_flush + 5 * HZ))) {
285 		if (!neigh_forced_gc(tbl) &&
286 		    entries >= tbl->gc_thresh3)
287 			goto out_entries;
288 	}
289 
290 	n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
291 	if (!n)
292 		goto out_entries;
293 
294 	skb_queue_head_init(&n->arp_queue);
295 	rwlock_init(&n->lock);
296 	seqlock_init(&n->ha_lock);
297 	n->updated	  = n->used = now;
298 	n->nud_state	  = NUD_NONE;
299 	n->output	  = neigh_blackhole;
300 	n->parms	  = neigh_parms_clone(&tbl->parms);
301 	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
302 
303 	NEIGH_CACHE_STAT_INC(tbl, allocs);
304 	n->tbl		  = tbl;
305 	atomic_set(&n->refcnt, 1);
306 	n->dead		  = 1;
307 out:
308 	return n;
309 
310 out_entries:
311 	atomic_dec(&tbl->entries);
312 	goto out;
313 }
314 
315 static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
316 {
317 	size_t size = entries * sizeof(struct neighbour *);
318 	struct neigh_hash_table *ret;
319 	struct neighbour __rcu **buckets;
320 
321 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
322 	if (!ret)
323 		return NULL;
324 	if (size <= PAGE_SIZE)
325 		buckets = kzalloc(size, GFP_ATOMIC);
326 	else
327 		buckets = (struct neighbour __rcu **)
328 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
329 					   get_order(size));
330 	if (!buckets) {
331 		kfree(ret);
332 		return NULL;
333 	}
334 	ret->hash_buckets = buckets;
335 	ret->hash_mask = entries - 1;
336 	get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
337 	return ret;
338 }
339 
340 static void neigh_hash_free_rcu(struct rcu_head *head)
341 {
342 	struct neigh_hash_table *nht = container_of(head,
343 						    struct neigh_hash_table,
344 						    rcu);
345 	size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
346 	struct neighbour __rcu **buckets = nht->hash_buckets;
347 
348 	if (size <= PAGE_SIZE)
349 		kfree(buckets);
350 	else
351 		free_pages((unsigned long)buckets, get_order(size));
352 	kfree(nht);
353 }
354 
355 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
356 						unsigned long new_entries)
357 {
358 	unsigned int i, hash;
359 	struct neigh_hash_table *new_nht, *old_nht;
360 
361 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
362 
363 	BUG_ON(!is_power_of_2(new_entries));
364 	old_nht = rcu_dereference_protected(tbl->nht,
365 					    lockdep_is_held(&tbl->lock));
366 	new_nht = neigh_hash_alloc(new_entries);
367 	if (!new_nht)
368 		return old_nht;
369 
370 	for (i = 0; i <= old_nht->hash_mask; i++) {
371 		struct neighbour *n, *next;
372 
373 		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
374 						   lockdep_is_held(&tbl->lock));
375 		     n != NULL;
376 		     n = next) {
377 			hash = tbl->hash(n->primary_key, n->dev,
378 					 new_nht->hash_rnd);
379 
380 			hash &= new_nht->hash_mask;
381 			next = rcu_dereference_protected(n->next,
382 						lockdep_is_held(&tbl->lock));
383 
384 			rcu_assign_pointer(n->next,
385 					   rcu_dereference_protected(
386 						new_nht->hash_buckets[hash],
387 						lockdep_is_held(&tbl->lock)));
388 			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
389 		}
390 	}
391 
392 	rcu_assign_pointer(tbl->nht, new_nht);
393 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
394 	return new_nht;
395 }
396 
397 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
398 			       struct net_device *dev)
399 {
400 	struct neighbour *n;
401 	int key_len = tbl->key_len;
402 	u32 hash_val;
403 	struct neigh_hash_table *nht;
404 
405 	NEIGH_CACHE_STAT_INC(tbl, lookups);
406 
407 	rcu_read_lock_bh();
408 	nht = rcu_dereference_bh(tbl->nht);
409 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
410 
411 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
412 	     n != NULL;
413 	     n = rcu_dereference_bh(n->next)) {
414 		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
415 			if (!atomic_inc_not_zero(&n->refcnt))
416 				n = NULL;
417 			NEIGH_CACHE_STAT_INC(tbl, hits);
418 			break;
419 		}
420 	}
421 
422 	rcu_read_unlock_bh();
423 	return n;
424 }
425 EXPORT_SYMBOL(neigh_lookup);
426 
427 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
428 				     const void *pkey)
429 {
430 	struct neighbour *n;
431 	int key_len = tbl->key_len;
432 	u32 hash_val;
433 	struct neigh_hash_table *nht;
434 
435 	NEIGH_CACHE_STAT_INC(tbl, lookups);
436 
437 	rcu_read_lock_bh();
438 	nht = rcu_dereference_bh(tbl->nht);
439 	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask;
440 
441 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
442 	     n != NULL;
443 	     n = rcu_dereference_bh(n->next)) {
444 		if (!memcmp(n->primary_key, pkey, key_len) &&
445 		    net_eq(dev_net(n->dev), net)) {
446 			if (!atomic_inc_not_zero(&n->refcnt))
447 				n = NULL;
448 			NEIGH_CACHE_STAT_INC(tbl, hits);
449 			break;
450 		}
451 	}
452 
453 	rcu_read_unlock_bh();
454 	return n;
455 }
456 EXPORT_SYMBOL(neigh_lookup_nodev);
457 
458 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
459 			       struct net_device *dev)
460 {
461 	u32 hash_val;
462 	int key_len = tbl->key_len;
463 	int error;
464 	struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
465 	struct neigh_hash_table *nht;
466 
467 	if (!n) {
468 		rc = ERR_PTR(-ENOBUFS);
469 		goto out;
470 	}
471 
472 	memcpy(n->primary_key, pkey, key_len);
473 	n->dev = dev;
474 	dev_hold(dev);
475 
476 	/* Protocol specific setup. */
477 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
478 		rc = ERR_PTR(error);
479 		goto out_neigh_release;
480 	}
481 
482 	/* Device specific setup. */
483 	if (n->parms->neigh_setup &&
484 	    (error = n->parms->neigh_setup(n)) < 0) {
485 		rc = ERR_PTR(error);
486 		goto out_neigh_release;
487 	}
488 
489 	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
490 
491 	write_lock_bh(&tbl->lock);
492 	nht = rcu_dereference_protected(tbl->nht,
493 					lockdep_is_held(&tbl->lock));
494 
495 	if (atomic_read(&tbl->entries) > (nht->hash_mask + 1))
496 		nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1);
497 
498 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
499 
500 	if (n->parms->dead) {
501 		rc = ERR_PTR(-EINVAL);
502 		goto out_tbl_unlock;
503 	}
504 
505 	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
506 					    lockdep_is_held(&tbl->lock));
507 	     n1 != NULL;
508 	     n1 = rcu_dereference_protected(n1->next,
509 			lockdep_is_held(&tbl->lock))) {
510 		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
511 			neigh_hold(n1);
512 			rc = n1;
513 			goto out_tbl_unlock;
514 		}
515 	}
516 
517 	n->dead = 0;
518 	neigh_hold(n);
519 	rcu_assign_pointer(n->next,
520 			   rcu_dereference_protected(nht->hash_buckets[hash_val],
521 						     lockdep_is_held(&tbl->lock)));
522 	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
523 	write_unlock_bh(&tbl->lock);
524 	NEIGH_PRINTK2("neigh %p is created.\n", n);
525 	rc = n;
526 out:
527 	return rc;
528 out_tbl_unlock:
529 	write_unlock_bh(&tbl->lock);
530 out_neigh_release:
531 	neigh_release(n);
532 	goto out;
533 }
534 EXPORT_SYMBOL(neigh_create);
535 
536 static u32 pneigh_hash(const void *pkey, int key_len)
537 {
538 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
539 	hash_val ^= (hash_val >> 16);
540 	hash_val ^= hash_val >> 8;
541 	hash_val ^= hash_val >> 4;
542 	hash_val &= PNEIGH_HASHMASK;
543 	return hash_val;
544 }
545 
546 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
547 					      struct net *net,
548 					      const void *pkey,
549 					      int key_len,
550 					      struct net_device *dev)
551 {
552 	while (n) {
553 		if (!memcmp(n->key, pkey, key_len) &&
554 		    net_eq(pneigh_net(n), net) &&
555 		    (n->dev == dev || !n->dev))
556 			return n;
557 		n = n->next;
558 	}
559 	return NULL;
560 }
561 
562 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
563 		struct net *net, const void *pkey, struct net_device *dev)
564 {
565 	int key_len = tbl->key_len;
566 	u32 hash_val = pneigh_hash(pkey, key_len);
567 
568 	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
569 				 net, pkey, key_len, dev);
570 }
571 EXPORT_SYMBOL_GPL(__pneigh_lookup);
572 
573 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
574 				    struct net *net, const void *pkey,
575 				    struct net_device *dev, int creat)
576 {
577 	struct pneigh_entry *n;
578 	int key_len = tbl->key_len;
579 	u32 hash_val = pneigh_hash(pkey, key_len);
580 
581 	read_lock_bh(&tbl->lock);
582 	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
583 			      net, pkey, key_len, dev);
584 	read_unlock_bh(&tbl->lock);
585 
586 	if (n || !creat)
587 		goto out;
588 
589 	ASSERT_RTNL();
590 
591 	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
592 	if (!n)
593 		goto out;
594 
595 	write_pnet(&n->net, hold_net(net));
596 	memcpy(n->key, pkey, key_len);
597 	n->dev = dev;
598 	if (dev)
599 		dev_hold(dev);
600 
601 	if (tbl->pconstructor && tbl->pconstructor(n)) {
602 		if (dev)
603 			dev_put(dev);
604 		release_net(net);
605 		kfree(n);
606 		n = NULL;
607 		goto out;
608 	}
609 
610 	write_lock_bh(&tbl->lock);
611 	n->next = tbl->phash_buckets[hash_val];
612 	tbl->phash_buckets[hash_val] = n;
613 	write_unlock_bh(&tbl->lock);
614 out:
615 	return n;
616 }
617 EXPORT_SYMBOL(pneigh_lookup);
618 
619 
620 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
621 		  struct net_device *dev)
622 {
623 	struct pneigh_entry *n, **np;
624 	int key_len = tbl->key_len;
625 	u32 hash_val = pneigh_hash(pkey, key_len);
626 
627 	write_lock_bh(&tbl->lock);
628 	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
629 	     np = &n->next) {
630 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
631 		    net_eq(pneigh_net(n), net)) {
632 			*np = n->next;
633 			write_unlock_bh(&tbl->lock);
634 			if (tbl->pdestructor)
635 				tbl->pdestructor(n);
636 			if (n->dev)
637 				dev_put(n->dev);
638 			release_net(pneigh_net(n));
639 			kfree(n);
640 			return 0;
641 		}
642 	}
643 	write_unlock_bh(&tbl->lock);
644 	return -ENOENT;
645 }
646 
647 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
648 {
649 	struct pneigh_entry *n, **np;
650 	u32 h;
651 
652 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
653 		np = &tbl->phash_buckets[h];
654 		while ((n = *np) != NULL) {
655 			if (!dev || n->dev == dev) {
656 				*np = n->next;
657 				if (tbl->pdestructor)
658 					tbl->pdestructor(n);
659 				if (n->dev)
660 					dev_put(n->dev);
661 				release_net(pneigh_net(n));
662 				kfree(n);
663 				continue;
664 			}
665 			np = &n->next;
666 		}
667 	}
668 	return -ENOENT;
669 }
670 
671 static void neigh_parms_destroy(struct neigh_parms *parms);
672 
673 static inline void neigh_parms_put(struct neigh_parms *parms)
674 {
675 	if (atomic_dec_and_test(&parms->refcnt))
676 		neigh_parms_destroy(parms);
677 }
678 
679 static void neigh_destroy_rcu(struct rcu_head *head)
680 {
681 	struct neighbour *neigh = container_of(head, struct neighbour, rcu);
682 
683 	kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
684 }
685 /*
686  *	neighbour must already be out of the table;
687  *
688  */
689 void neigh_destroy(struct neighbour *neigh)
690 {
691 	struct hh_cache *hh;
692 
693 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
694 
695 	if (!neigh->dead) {
696 		printk(KERN_WARNING
697 		       "Destroying alive neighbour %p\n", neigh);
698 		dump_stack();
699 		return;
700 	}
701 
702 	if (neigh_del_timer(neigh))
703 		printk(KERN_WARNING "Impossible event.\n");
704 
705 	while ((hh = neigh->hh) != NULL) {
706 		neigh->hh = hh->hh_next;
707 		hh->hh_next = NULL;
708 
709 		write_seqlock_bh(&hh->hh_lock);
710 		hh->hh_output = neigh_blackhole;
711 		write_sequnlock_bh(&hh->hh_lock);
712 		hh_cache_put(hh);
713 	}
714 
715 	skb_queue_purge(&neigh->arp_queue);
716 
717 	dev_put(neigh->dev);
718 	neigh_parms_put(neigh->parms);
719 
720 	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
721 
722 	atomic_dec(&neigh->tbl->entries);
723 	call_rcu(&neigh->rcu, neigh_destroy_rcu);
724 }
725 EXPORT_SYMBOL(neigh_destroy);
726 
727 /* Neighbour state is suspicious;
728    disable fast path.
729 
730    Called with write_locked neigh.
731  */
732 static void neigh_suspect(struct neighbour *neigh)
733 {
734 	struct hh_cache *hh;
735 
736 	NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
737 
738 	neigh->output = neigh->ops->output;
739 
740 	for (hh = neigh->hh; hh; hh = hh->hh_next)
741 		hh->hh_output = neigh->ops->output;
742 }
743 
744 /* Neighbour state is OK;
745    enable fast path.
746 
747    Called with write_locked neigh.
748  */
749 static void neigh_connect(struct neighbour *neigh)
750 {
751 	struct hh_cache *hh;
752 
753 	NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
754 
755 	neigh->output = neigh->ops->connected_output;
756 
757 	for (hh = neigh->hh; hh; hh = hh->hh_next)
758 		hh->hh_output = neigh->ops->hh_output;
759 }
760 
761 static void neigh_periodic_work(struct work_struct *work)
762 {
763 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
764 	struct neighbour *n;
765 	struct neighbour __rcu **np;
766 	unsigned int i;
767 	struct neigh_hash_table *nht;
768 
769 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
770 
771 	write_lock_bh(&tbl->lock);
772 	nht = rcu_dereference_protected(tbl->nht,
773 					lockdep_is_held(&tbl->lock));
774 
775 	/*
776 	 *	periodically recompute ReachableTime from random function
777 	 */
778 
779 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
780 		struct neigh_parms *p;
781 		tbl->last_rand = jiffies;
782 		for (p = &tbl->parms; p; p = p->next)
783 			p->reachable_time =
784 				neigh_rand_reach_time(p->base_reachable_time);
785 	}
786 
787 	for (i = 0 ; i <= nht->hash_mask; i++) {
788 		np = &nht->hash_buckets[i];
789 
790 		while ((n = rcu_dereference_protected(*np,
791 				lockdep_is_held(&tbl->lock))) != NULL) {
792 			unsigned int state;
793 
794 			write_lock(&n->lock);
795 
796 			state = n->nud_state;
797 			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
798 				write_unlock(&n->lock);
799 				goto next_elt;
800 			}
801 
802 			if (time_before(n->used, n->confirmed))
803 				n->used = n->confirmed;
804 
805 			if (atomic_read(&n->refcnt) == 1 &&
806 			    (state == NUD_FAILED ||
807 			     time_after(jiffies, n->used + n->parms->gc_staletime))) {
808 				*np = n->next;
809 				n->dead = 1;
810 				write_unlock(&n->lock);
811 				neigh_cleanup_and_release(n);
812 				continue;
813 			}
814 			write_unlock(&n->lock);
815 
816 next_elt:
817 			np = &n->next;
818 		}
819 		/*
820 		 * It's fine to release lock here, even if hash table
821 		 * grows while we are preempted.
822 		 */
823 		write_unlock_bh(&tbl->lock);
824 		cond_resched();
825 		write_lock_bh(&tbl->lock);
826 	}
827 	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
828 	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
829 	 * base_reachable_time.
830 	 */
831 	schedule_delayed_work(&tbl->gc_work,
832 			      tbl->parms.base_reachable_time >> 1);
833 	write_unlock_bh(&tbl->lock);
834 }
835 
836 static __inline__ int neigh_max_probes(struct neighbour *n)
837 {
838 	struct neigh_parms *p = n->parms;
839 	return (n->nud_state & NUD_PROBE) ?
840 		p->ucast_probes :
841 		p->ucast_probes + p->app_probes + p->mcast_probes;
842 }
843 
844 static void neigh_invalidate(struct neighbour *neigh)
845 	__releases(neigh->lock)
846 	__acquires(neigh->lock)
847 {
848 	struct sk_buff *skb;
849 
850 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
851 	NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
852 	neigh->updated = jiffies;
853 
854 	/* It is very thin place. report_unreachable is very complicated
855 	   routine. Particularly, it can hit the same neighbour entry!
856 
857 	   So that, we try to be accurate and avoid dead loop. --ANK
858 	 */
859 	while (neigh->nud_state == NUD_FAILED &&
860 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
861 		write_unlock(&neigh->lock);
862 		neigh->ops->error_report(neigh, skb);
863 		write_lock(&neigh->lock);
864 	}
865 	skb_queue_purge(&neigh->arp_queue);
866 }
867 
868 /* Called when a timer expires for a neighbour entry. */
869 
870 static void neigh_timer_handler(unsigned long arg)
871 {
872 	unsigned long now, next;
873 	struct neighbour *neigh = (struct neighbour *)arg;
874 	unsigned state;
875 	int notify = 0;
876 
877 	write_lock(&neigh->lock);
878 
879 	state = neigh->nud_state;
880 	now = jiffies;
881 	next = now + HZ;
882 
883 	if (!(state & NUD_IN_TIMER)) {
884 #ifndef CONFIG_SMP
885 		printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
886 #endif
887 		goto out;
888 	}
889 
890 	if (state & NUD_REACHABLE) {
891 		if (time_before_eq(now,
892 				   neigh->confirmed + neigh->parms->reachable_time)) {
893 			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
894 			next = neigh->confirmed + neigh->parms->reachable_time;
895 		} else if (time_before_eq(now,
896 					  neigh->used + neigh->parms->delay_probe_time)) {
897 			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
898 			neigh->nud_state = NUD_DELAY;
899 			neigh->updated = jiffies;
900 			neigh_suspect(neigh);
901 			next = now + neigh->parms->delay_probe_time;
902 		} else {
903 			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
904 			neigh->nud_state = NUD_STALE;
905 			neigh->updated = jiffies;
906 			neigh_suspect(neigh);
907 			notify = 1;
908 		}
909 	} else if (state & NUD_DELAY) {
910 		if (time_before_eq(now,
911 				   neigh->confirmed + neigh->parms->delay_probe_time)) {
912 			NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
913 			neigh->nud_state = NUD_REACHABLE;
914 			neigh->updated = jiffies;
915 			neigh_connect(neigh);
916 			notify = 1;
917 			next = neigh->confirmed + neigh->parms->reachable_time;
918 		} else {
919 			NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
920 			neigh->nud_state = NUD_PROBE;
921 			neigh->updated = jiffies;
922 			atomic_set(&neigh->probes, 0);
923 			next = now + neigh->parms->retrans_time;
924 		}
925 	} else {
926 		/* NUD_PROBE|NUD_INCOMPLETE */
927 		next = now + neigh->parms->retrans_time;
928 	}
929 
930 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
931 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
932 		neigh->nud_state = NUD_FAILED;
933 		notify = 1;
934 		neigh_invalidate(neigh);
935 	}
936 
937 	if (neigh->nud_state & NUD_IN_TIMER) {
938 		if (time_before(next, jiffies + HZ/2))
939 			next = jiffies + HZ/2;
940 		if (!mod_timer(&neigh->timer, next))
941 			neigh_hold(neigh);
942 	}
943 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
944 		struct sk_buff *skb = skb_peek(&neigh->arp_queue);
945 		/* keep skb alive even if arp_queue overflows */
946 		if (skb)
947 			skb = skb_copy(skb, GFP_ATOMIC);
948 		write_unlock(&neigh->lock);
949 		neigh->ops->solicit(neigh, skb);
950 		atomic_inc(&neigh->probes);
951 		kfree_skb(skb);
952 	} else {
953 out:
954 		write_unlock(&neigh->lock);
955 	}
956 
957 	if (notify)
958 		neigh_update_notify(neigh);
959 
960 	neigh_release(neigh);
961 }
962 
963 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
964 {
965 	int rc;
966 	unsigned long now;
967 
968 	write_lock_bh(&neigh->lock);
969 
970 	rc = 0;
971 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
972 		goto out_unlock_bh;
973 
974 	now = jiffies;
975 
976 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
977 		if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
978 			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
979 			neigh->nud_state     = NUD_INCOMPLETE;
980 			neigh->updated = jiffies;
981 			neigh_add_timer(neigh, now + 1);
982 		} else {
983 			neigh->nud_state = NUD_FAILED;
984 			neigh->updated = jiffies;
985 			write_unlock_bh(&neigh->lock);
986 
987 			kfree_skb(skb);
988 			return 1;
989 		}
990 	} else if (neigh->nud_state & NUD_STALE) {
991 		NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
992 		neigh->nud_state = NUD_DELAY;
993 		neigh->updated = jiffies;
994 		neigh_add_timer(neigh,
995 				jiffies + neigh->parms->delay_probe_time);
996 	}
997 
998 	if (neigh->nud_state == NUD_INCOMPLETE) {
999 		if (skb) {
1000 			if (skb_queue_len(&neigh->arp_queue) >=
1001 			    neigh->parms->queue_len) {
1002 				struct sk_buff *buff;
1003 				buff = __skb_dequeue(&neigh->arp_queue);
1004 				kfree_skb(buff);
1005 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1006 			}
1007 			skb_dst_force(skb);
1008 			__skb_queue_tail(&neigh->arp_queue, skb);
1009 		}
1010 		rc = 1;
1011 	}
1012 out_unlock_bh:
1013 	write_unlock_bh(&neigh->lock);
1014 	return rc;
1015 }
1016 EXPORT_SYMBOL(__neigh_event_send);
1017 
1018 static void neigh_update_hhs(const struct neighbour *neigh)
1019 {
1020 	struct hh_cache *hh;
1021 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1022 		= NULL;
1023 
1024 	if (neigh->dev->header_ops)
1025 		update = neigh->dev->header_ops->cache_update;
1026 
1027 	if (update) {
1028 		for (hh = neigh->hh; hh; hh = hh->hh_next) {
1029 			write_seqlock_bh(&hh->hh_lock);
1030 			update(hh, neigh->dev, neigh->ha);
1031 			write_sequnlock_bh(&hh->hh_lock);
1032 		}
1033 	}
1034 }
1035 
1036 
1037 
1038 /* Generic update routine.
1039    -- lladdr is new lladdr or NULL, if it is not supplied.
1040    -- new    is new state.
1041    -- flags
1042 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1043 				if it is different.
1044 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1045 				lladdr instead of overriding it
1046 				if it is different.
1047 				It also allows to retain current state
1048 				if lladdr is unchanged.
1049 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1050 
1051 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1052 				NTF_ROUTER flag.
1053 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1054 				a router.
1055 
1056    Caller MUST hold reference count on the entry.
1057  */
1058 
1059 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1060 		 u32 flags)
1061 {
1062 	u8 old;
1063 	int err;
1064 	int notify = 0;
1065 	struct net_device *dev;
1066 	int update_isrouter = 0;
1067 
1068 	write_lock_bh(&neigh->lock);
1069 
1070 	dev    = neigh->dev;
1071 	old    = neigh->nud_state;
1072 	err    = -EPERM;
1073 
1074 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1075 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1076 		goto out;
1077 
1078 	if (!(new & NUD_VALID)) {
1079 		neigh_del_timer(neigh);
1080 		if (old & NUD_CONNECTED)
1081 			neigh_suspect(neigh);
1082 		neigh->nud_state = new;
1083 		err = 0;
1084 		notify = old & NUD_VALID;
1085 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1086 		    (new & NUD_FAILED)) {
1087 			neigh_invalidate(neigh);
1088 			notify = 1;
1089 		}
1090 		goto out;
1091 	}
1092 
1093 	/* Compare new lladdr with cached one */
1094 	if (!dev->addr_len) {
1095 		/* First case: device needs no address. */
1096 		lladdr = neigh->ha;
1097 	} else if (lladdr) {
1098 		/* The second case: if something is already cached
1099 		   and a new address is proposed:
1100 		   - compare new & old
1101 		   - if they are different, check override flag
1102 		 */
1103 		if ((old & NUD_VALID) &&
1104 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1105 			lladdr = neigh->ha;
1106 	} else {
1107 		/* No address is supplied; if we know something,
1108 		   use it, otherwise discard the request.
1109 		 */
1110 		err = -EINVAL;
1111 		if (!(old & NUD_VALID))
1112 			goto out;
1113 		lladdr = neigh->ha;
1114 	}
1115 
1116 	if (new & NUD_CONNECTED)
1117 		neigh->confirmed = jiffies;
1118 	neigh->updated = jiffies;
1119 
1120 	/* If entry was valid and address is not changed,
1121 	   do not change entry state, if new one is STALE.
1122 	 */
1123 	err = 0;
1124 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1125 	if (old & NUD_VALID) {
1126 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1127 			update_isrouter = 0;
1128 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1129 			    (old & NUD_CONNECTED)) {
1130 				lladdr = neigh->ha;
1131 				new = NUD_STALE;
1132 			} else
1133 				goto out;
1134 		} else {
1135 			if (lladdr == neigh->ha && new == NUD_STALE &&
1136 			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1137 			     (old & NUD_CONNECTED))
1138 			    )
1139 				new = old;
1140 		}
1141 	}
1142 
1143 	if (new != old) {
1144 		neigh_del_timer(neigh);
1145 		if (new & NUD_IN_TIMER)
1146 			neigh_add_timer(neigh, (jiffies +
1147 						((new & NUD_REACHABLE) ?
1148 						 neigh->parms->reachable_time :
1149 						 0)));
1150 		neigh->nud_state = new;
1151 	}
1152 
1153 	if (lladdr != neigh->ha) {
1154 		write_seqlock(&neigh->ha_lock);
1155 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1156 		write_sequnlock(&neigh->ha_lock);
1157 		neigh_update_hhs(neigh);
1158 		if (!(new & NUD_CONNECTED))
1159 			neigh->confirmed = jiffies -
1160 				      (neigh->parms->base_reachable_time << 1);
1161 		notify = 1;
1162 	}
1163 	if (new == old)
1164 		goto out;
1165 	if (new & NUD_CONNECTED)
1166 		neigh_connect(neigh);
1167 	else
1168 		neigh_suspect(neigh);
1169 	if (!(old & NUD_VALID)) {
1170 		struct sk_buff *skb;
1171 
1172 		/* Again: avoid dead loop if something went wrong */
1173 
1174 		while (neigh->nud_state & NUD_VALID &&
1175 		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1176 			struct neighbour *n1 = neigh;
1177 			write_unlock_bh(&neigh->lock);
1178 			/* On shaper/eql skb->dst->neighbour != neigh :( */
1179 			if (skb_dst(skb) && skb_dst(skb)->neighbour)
1180 				n1 = skb_dst(skb)->neighbour;
1181 			n1->output(skb);
1182 			write_lock_bh(&neigh->lock);
1183 		}
1184 		skb_queue_purge(&neigh->arp_queue);
1185 	}
1186 out:
1187 	if (update_isrouter) {
1188 		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1189 			(neigh->flags | NTF_ROUTER) :
1190 			(neigh->flags & ~NTF_ROUTER);
1191 	}
1192 	write_unlock_bh(&neigh->lock);
1193 
1194 	if (notify)
1195 		neigh_update_notify(neigh);
1196 
1197 	return err;
1198 }
1199 EXPORT_SYMBOL(neigh_update);
1200 
1201 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1202 				 u8 *lladdr, void *saddr,
1203 				 struct net_device *dev)
1204 {
1205 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1206 						 lladdr || !dev->addr_len);
1207 	if (neigh)
1208 		neigh_update(neigh, lladdr, NUD_STALE,
1209 			     NEIGH_UPDATE_F_OVERRIDE);
1210 	return neigh;
1211 }
1212 EXPORT_SYMBOL(neigh_event_ns);
1213 
1214 static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
1215 				   __be16 protocol)
1216 {
1217 	struct hh_cache *hh;
1218 
1219 	smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
1220 	for (hh = n->hh; hh; hh = hh->hh_next) {
1221 		if (hh->hh_type == protocol) {
1222 			atomic_inc(&hh->hh_refcnt);
1223 			if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
1224 				hh_cache_put(hh);
1225 			return true;
1226 		}
1227 	}
1228 	return false;
1229 }
1230 
1231 /* called with read_lock_bh(&n->lock); */
1232 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
1233 			  __be16 protocol)
1234 {
1235 	struct hh_cache	*hh;
1236 	struct net_device *dev = dst->dev;
1237 
1238 	if (likely(neigh_hh_lookup(n, dst, protocol)))
1239 		return;
1240 
1241 	/* slow path */
1242 	hh = kzalloc(sizeof(*hh), GFP_ATOMIC);
1243 	if (!hh)
1244 		return;
1245 
1246 	seqlock_init(&hh->hh_lock);
1247 	hh->hh_type = protocol;
1248 	atomic_set(&hh->hh_refcnt, 2);
1249 
1250 	if (dev->header_ops->cache(n, hh)) {
1251 		kfree(hh);
1252 		return;
1253 	}
1254 
1255 	write_lock_bh(&n->lock);
1256 
1257 	/* must check if another thread already did the insert */
1258 	if (neigh_hh_lookup(n, dst, protocol)) {
1259 		kfree(hh);
1260 		goto end;
1261 	}
1262 
1263 	if (n->nud_state & NUD_CONNECTED)
1264 		hh->hh_output = n->ops->hh_output;
1265 	else
1266 		hh->hh_output = n->ops->output;
1267 
1268 	hh->hh_next = n->hh;
1269 	smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
1270 	n->hh	    = hh;
1271 
1272 	if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
1273 		hh_cache_put(hh);
1274 end:
1275 	write_unlock_bh(&n->lock);
1276 }
1277 
1278 /* This function can be used in contexts, where only old dev_queue_xmit
1279  * worked, f.e. if you want to override normal output path (eql, shaper),
1280  * but resolution is not made yet.
1281  */
1282 
1283 int neigh_compat_output(struct sk_buff *skb)
1284 {
1285 	struct net_device *dev = skb->dev;
1286 
1287 	__skb_pull(skb, skb_network_offset(skb));
1288 
1289 	if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1290 			    skb->len) < 0 &&
1291 	    dev->header_ops->rebuild(skb))
1292 		return 0;
1293 
1294 	return dev_queue_xmit(skb);
1295 }
1296 EXPORT_SYMBOL(neigh_compat_output);
1297 
1298 /* Slow and careful. */
1299 
1300 int neigh_resolve_output(struct sk_buff *skb)
1301 {
1302 	struct dst_entry *dst = skb_dst(skb);
1303 	struct neighbour *neigh;
1304 	int rc = 0;
1305 
1306 	if (!dst || !(neigh = dst->neighbour))
1307 		goto discard;
1308 
1309 	__skb_pull(skb, skb_network_offset(skb));
1310 
1311 	if (!neigh_event_send(neigh, skb)) {
1312 		int err;
1313 		struct net_device *dev = neigh->dev;
1314 		unsigned int seq;
1315 
1316 		if (dev->header_ops->cache &&
1317 		    !dst->hh &&
1318 		    !(dst->flags & DST_NOCACHE))
1319 			neigh_hh_init(neigh, dst, dst->ops->protocol);
1320 
1321 		do {
1322 			seq = read_seqbegin(&neigh->ha_lock);
1323 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1324 					      neigh->ha, NULL, skb->len);
1325 		} while (read_seqretry(&neigh->ha_lock, seq));
1326 
1327 		if (err >= 0)
1328 			rc = neigh->ops->queue_xmit(skb);
1329 		else
1330 			goto out_kfree_skb;
1331 	}
1332 out:
1333 	return rc;
1334 discard:
1335 	NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1336 		      dst, dst ? dst->neighbour : NULL);
1337 out_kfree_skb:
1338 	rc = -EINVAL;
1339 	kfree_skb(skb);
1340 	goto out;
1341 }
1342 EXPORT_SYMBOL(neigh_resolve_output);
1343 
1344 /* As fast as possible without hh cache */
1345 
1346 int neigh_connected_output(struct sk_buff *skb)
1347 {
1348 	int err;
1349 	struct dst_entry *dst = skb_dst(skb);
1350 	struct neighbour *neigh = dst->neighbour;
1351 	struct net_device *dev = neigh->dev;
1352 	unsigned int seq;
1353 
1354 	__skb_pull(skb, skb_network_offset(skb));
1355 
1356 	do {
1357 		seq = read_seqbegin(&neigh->ha_lock);
1358 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1359 				      neigh->ha, NULL, skb->len);
1360 	} while (read_seqretry(&neigh->ha_lock, seq));
1361 
1362 	if (err >= 0)
1363 		err = neigh->ops->queue_xmit(skb);
1364 	else {
1365 		err = -EINVAL;
1366 		kfree_skb(skb);
1367 	}
1368 	return err;
1369 }
1370 EXPORT_SYMBOL(neigh_connected_output);
1371 
1372 static void neigh_proxy_process(unsigned long arg)
1373 {
1374 	struct neigh_table *tbl = (struct neigh_table *)arg;
1375 	long sched_next = 0;
1376 	unsigned long now = jiffies;
1377 	struct sk_buff *skb, *n;
1378 
1379 	spin_lock(&tbl->proxy_queue.lock);
1380 
1381 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1382 		long tdif = NEIGH_CB(skb)->sched_next - now;
1383 
1384 		if (tdif <= 0) {
1385 			struct net_device *dev = skb->dev;
1386 			__skb_unlink(skb, &tbl->proxy_queue);
1387 			if (tbl->proxy_redo && netif_running(dev))
1388 				tbl->proxy_redo(skb);
1389 			else
1390 				kfree_skb(skb);
1391 
1392 			dev_put(dev);
1393 		} else if (!sched_next || tdif < sched_next)
1394 			sched_next = tdif;
1395 	}
1396 	del_timer(&tbl->proxy_timer);
1397 	if (sched_next)
1398 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1399 	spin_unlock(&tbl->proxy_queue.lock);
1400 }
1401 
1402 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1403 		    struct sk_buff *skb)
1404 {
1405 	unsigned long now = jiffies;
1406 	unsigned long sched_next = now + (net_random() % p->proxy_delay);
1407 
1408 	if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1409 		kfree_skb(skb);
1410 		return;
1411 	}
1412 
1413 	NEIGH_CB(skb)->sched_next = sched_next;
1414 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1415 
1416 	spin_lock(&tbl->proxy_queue.lock);
1417 	if (del_timer(&tbl->proxy_timer)) {
1418 		if (time_before(tbl->proxy_timer.expires, sched_next))
1419 			sched_next = tbl->proxy_timer.expires;
1420 	}
1421 	skb_dst_drop(skb);
1422 	dev_hold(skb->dev);
1423 	__skb_queue_tail(&tbl->proxy_queue, skb);
1424 	mod_timer(&tbl->proxy_timer, sched_next);
1425 	spin_unlock(&tbl->proxy_queue.lock);
1426 }
1427 EXPORT_SYMBOL(pneigh_enqueue);
1428 
1429 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1430 						      struct net *net, int ifindex)
1431 {
1432 	struct neigh_parms *p;
1433 
1434 	for (p = &tbl->parms; p; p = p->next) {
1435 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1436 		    (!p->dev && !ifindex))
1437 			return p;
1438 	}
1439 
1440 	return NULL;
1441 }
1442 
1443 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1444 				      struct neigh_table *tbl)
1445 {
1446 	struct neigh_parms *p, *ref;
1447 	struct net *net = dev_net(dev);
1448 	const struct net_device_ops *ops = dev->netdev_ops;
1449 
1450 	ref = lookup_neigh_parms(tbl, net, 0);
1451 	if (!ref)
1452 		return NULL;
1453 
1454 	p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1455 	if (p) {
1456 		p->tbl		  = tbl;
1457 		atomic_set(&p->refcnt, 1);
1458 		p->reachable_time =
1459 				neigh_rand_reach_time(p->base_reachable_time);
1460 
1461 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1462 			kfree(p);
1463 			return NULL;
1464 		}
1465 
1466 		dev_hold(dev);
1467 		p->dev = dev;
1468 		write_pnet(&p->net, hold_net(net));
1469 		p->sysctl_table = NULL;
1470 		write_lock_bh(&tbl->lock);
1471 		p->next		= tbl->parms.next;
1472 		tbl->parms.next = p;
1473 		write_unlock_bh(&tbl->lock);
1474 	}
1475 	return p;
1476 }
1477 EXPORT_SYMBOL(neigh_parms_alloc);
1478 
1479 static void neigh_rcu_free_parms(struct rcu_head *head)
1480 {
1481 	struct neigh_parms *parms =
1482 		container_of(head, struct neigh_parms, rcu_head);
1483 
1484 	neigh_parms_put(parms);
1485 }
1486 
1487 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1488 {
1489 	struct neigh_parms **p;
1490 
1491 	if (!parms || parms == &tbl->parms)
1492 		return;
1493 	write_lock_bh(&tbl->lock);
1494 	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1495 		if (*p == parms) {
1496 			*p = parms->next;
1497 			parms->dead = 1;
1498 			write_unlock_bh(&tbl->lock);
1499 			if (parms->dev)
1500 				dev_put(parms->dev);
1501 			call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1502 			return;
1503 		}
1504 	}
1505 	write_unlock_bh(&tbl->lock);
1506 	NEIGH_PRINTK1("neigh_parms_release: not found\n");
1507 }
1508 EXPORT_SYMBOL(neigh_parms_release);
1509 
1510 static void neigh_parms_destroy(struct neigh_parms *parms)
1511 {
1512 	release_net(neigh_parms_net(parms));
1513 	kfree(parms);
1514 }
1515 
1516 static struct lock_class_key neigh_table_proxy_queue_class;
1517 
1518 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1519 {
1520 	unsigned long now = jiffies;
1521 	unsigned long phsize;
1522 
1523 	write_pnet(&tbl->parms.net, &init_net);
1524 	atomic_set(&tbl->parms.refcnt, 1);
1525 	tbl->parms.reachable_time =
1526 			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
1527 
1528 	if (!tbl->kmem_cachep)
1529 		tbl->kmem_cachep =
1530 			kmem_cache_create(tbl->id, tbl->entry_size, 0,
1531 					  SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1532 					  NULL);
1533 	tbl->stats = alloc_percpu(struct neigh_statistics);
1534 	if (!tbl->stats)
1535 		panic("cannot create neighbour cache statistics");
1536 
1537 #ifdef CONFIG_PROC_FS
1538 	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1539 			      &neigh_stat_seq_fops, tbl))
1540 		panic("cannot create neighbour proc dir entry");
1541 #endif
1542 
1543 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8));
1544 
1545 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1546 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1547 
1548 	if (!tbl->nht || !tbl->phash_buckets)
1549 		panic("cannot allocate neighbour cache hashes");
1550 
1551 	rwlock_init(&tbl->lock);
1552 	INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1553 	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1554 	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1555 	skb_queue_head_init_class(&tbl->proxy_queue,
1556 			&neigh_table_proxy_queue_class);
1557 
1558 	tbl->last_flush = now;
1559 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1560 }
1561 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1562 
1563 void neigh_table_init(struct neigh_table *tbl)
1564 {
1565 	struct neigh_table *tmp;
1566 
1567 	neigh_table_init_no_netlink(tbl);
1568 	write_lock(&neigh_tbl_lock);
1569 	for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1570 		if (tmp->family == tbl->family)
1571 			break;
1572 	}
1573 	tbl->next	= neigh_tables;
1574 	neigh_tables	= tbl;
1575 	write_unlock(&neigh_tbl_lock);
1576 
1577 	if (unlikely(tmp)) {
1578 		printk(KERN_ERR "NEIGH: Registering multiple tables for "
1579 		       "family %d\n", tbl->family);
1580 		dump_stack();
1581 	}
1582 }
1583 EXPORT_SYMBOL(neigh_table_init);
1584 
1585 int neigh_table_clear(struct neigh_table *tbl)
1586 {
1587 	struct neigh_table **tp;
1588 
1589 	/* It is not clean... Fix it to unload IPv6 module safely */
1590 	cancel_delayed_work_sync(&tbl->gc_work);
1591 	del_timer_sync(&tbl->proxy_timer);
1592 	pneigh_queue_purge(&tbl->proxy_queue);
1593 	neigh_ifdown(tbl, NULL);
1594 	if (atomic_read(&tbl->entries))
1595 		printk(KERN_CRIT "neighbour leakage\n");
1596 	write_lock(&neigh_tbl_lock);
1597 	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1598 		if (*tp == tbl) {
1599 			*tp = tbl->next;
1600 			break;
1601 		}
1602 	}
1603 	write_unlock(&neigh_tbl_lock);
1604 
1605 	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1606 		 neigh_hash_free_rcu);
1607 	tbl->nht = NULL;
1608 
1609 	kfree(tbl->phash_buckets);
1610 	tbl->phash_buckets = NULL;
1611 
1612 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1613 
1614 	free_percpu(tbl->stats);
1615 	tbl->stats = NULL;
1616 
1617 	kmem_cache_destroy(tbl->kmem_cachep);
1618 	tbl->kmem_cachep = NULL;
1619 
1620 	return 0;
1621 }
1622 EXPORT_SYMBOL(neigh_table_clear);
1623 
1624 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1625 {
1626 	struct net *net = sock_net(skb->sk);
1627 	struct ndmsg *ndm;
1628 	struct nlattr *dst_attr;
1629 	struct neigh_table *tbl;
1630 	struct net_device *dev = NULL;
1631 	int err = -EINVAL;
1632 
1633 	ASSERT_RTNL();
1634 	if (nlmsg_len(nlh) < sizeof(*ndm))
1635 		goto out;
1636 
1637 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1638 	if (dst_attr == NULL)
1639 		goto out;
1640 
1641 	ndm = nlmsg_data(nlh);
1642 	if (ndm->ndm_ifindex) {
1643 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1644 		if (dev == NULL) {
1645 			err = -ENODEV;
1646 			goto out;
1647 		}
1648 	}
1649 
1650 	read_lock(&neigh_tbl_lock);
1651 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1652 		struct neighbour *neigh;
1653 
1654 		if (tbl->family != ndm->ndm_family)
1655 			continue;
1656 		read_unlock(&neigh_tbl_lock);
1657 
1658 		if (nla_len(dst_attr) < tbl->key_len)
1659 			goto out;
1660 
1661 		if (ndm->ndm_flags & NTF_PROXY) {
1662 			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1663 			goto out;
1664 		}
1665 
1666 		if (dev == NULL)
1667 			goto out;
1668 
1669 		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1670 		if (neigh == NULL) {
1671 			err = -ENOENT;
1672 			goto out;
1673 		}
1674 
1675 		err = neigh_update(neigh, NULL, NUD_FAILED,
1676 				   NEIGH_UPDATE_F_OVERRIDE |
1677 				   NEIGH_UPDATE_F_ADMIN);
1678 		neigh_release(neigh);
1679 		goto out;
1680 	}
1681 	read_unlock(&neigh_tbl_lock);
1682 	err = -EAFNOSUPPORT;
1683 
1684 out:
1685 	return err;
1686 }
1687 
1688 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1689 {
1690 	struct net *net = sock_net(skb->sk);
1691 	struct ndmsg *ndm;
1692 	struct nlattr *tb[NDA_MAX+1];
1693 	struct neigh_table *tbl;
1694 	struct net_device *dev = NULL;
1695 	int err;
1696 
1697 	ASSERT_RTNL();
1698 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1699 	if (err < 0)
1700 		goto out;
1701 
1702 	err = -EINVAL;
1703 	if (tb[NDA_DST] == NULL)
1704 		goto out;
1705 
1706 	ndm = nlmsg_data(nlh);
1707 	if (ndm->ndm_ifindex) {
1708 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1709 		if (dev == NULL) {
1710 			err = -ENODEV;
1711 			goto out;
1712 		}
1713 
1714 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1715 			goto out;
1716 	}
1717 
1718 	read_lock(&neigh_tbl_lock);
1719 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1720 		int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1721 		struct neighbour *neigh;
1722 		void *dst, *lladdr;
1723 
1724 		if (tbl->family != ndm->ndm_family)
1725 			continue;
1726 		read_unlock(&neigh_tbl_lock);
1727 
1728 		if (nla_len(tb[NDA_DST]) < tbl->key_len)
1729 			goto out;
1730 		dst = nla_data(tb[NDA_DST]);
1731 		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1732 
1733 		if (ndm->ndm_flags & NTF_PROXY) {
1734 			struct pneigh_entry *pn;
1735 
1736 			err = -ENOBUFS;
1737 			pn = pneigh_lookup(tbl, net, dst, dev, 1);
1738 			if (pn) {
1739 				pn->flags = ndm->ndm_flags;
1740 				err = 0;
1741 			}
1742 			goto out;
1743 		}
1744 
1745 		if (dev == NULL)
1746 			goto out;
1747 
1748 		neigh = neigh_lookup(tbl, dst, dev);
1749 		if (neigh == NULL) {
1750 			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1751 				err = -ENOENT;
1752 				goto out;
1753 			}
1754 
1755 			neigh = __neigh_lookup_errno(tbl, dst, dev);
1756 			if (IS_ERR(neigh)) {
1757 				err = PTR_ERR(neigh);
1758 				goto out;
1759 			}
1760 		} else {
1761 			if (nlh->nlmsg_flags & NLM_F_EXCL) {
1762 				err = -EEXIST;
1763 				neigh_release(neigh);
1764 				goto out;
1765 			}
1766 
1767 			if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1768 				flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1769 		}
1770 
1771 		if (ndm->ndm_flags & NTF_USE) {
1772 			neigh_event_send(neigh, NULL);
1773 			err = 0;
1774 		} else
1775 			err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1776 		neigh_release(neigh);
1777 		goto out;
1778 	}
1779 
1780 	read_unlock(&neigh_tbl_lock);
1781 	err = -EAFNOSUPPORT;
1782 out:
1783 	return err;
1784 }
1785 
1786 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1787 {
1788 	struct nlattr *nest;
1789 
1790 	nest = nla_nest_start(skb, NDTA_PARMS);
1791 	if (nest == NULL)
1792 		return -ENOBUFS;
1793 
1794 	if (parms->dev)
1795 		NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1796 
1797 	NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1798 	NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1799 	NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1800 	NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1801 	NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1802 	NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1803 	NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1804 	NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1805 		      parms->base_reachable_time);
1806 	NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1807 	NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1808 	NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1809 	NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1810 	NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1811 	NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1812 
1813 	return nla_nest_end(skb, nest);
1814 
1815 nla_put_failure:
1816 	nla_nest_cancel(skb, nest);
1817 	return -EMSGSIZE;
1818 }
1819 
1820 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1821 			      u32 pid, u32 seq, int type, int flags)
1822 {
1823 	struct nlmsghdr *nlh;
1824 	struct ndtmsg *ndtmsg;
1825 
1826 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1827 	if (nlh == NULL)
1828 		return -EMSGSIZE;
1829 
1830 	ndtmsg = nlmsg_data(nlh);
1831 
1832 	read_lock_bh(&tbl->lock);
1833 	ndtmsg->ndtm_family = tbl->family;
1834 	ndtmsg->ndtm_pad1   = 0;
1835 	ndtmsg->ndtm_pad2   = 0;
1836 
1837 	NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1838 	NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1839 	NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1840 	NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1841 	NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1842 
1843 	{
1844 		unsigned long now = jiffies;
1845 		unsigned int flush_delta = now - tbl->last_flush;
1846 		unsigned int rand_delta = now - tbl->last_rand;
1847 		struct neigh_hash_table *nht;
1848 		struct ndt_config ndc = {
1849 			.ndtc_key_len		= tbl->key_len,
1850 			.ndtc_entry_size	= tbl->entry_size,
1851 			.ndtc_entries		= atomic_read(&tbl->entries),
1852 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
1853 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
1854 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
1855 		};
1856 
1857 		rcu_read_lock_bh();
1858 		nht = rcu_dereference_bh(tbl->nht);
1859 		ndc.ndtc_hash_rnd = nht->hash_rnd;
1860 		ndc.ndtc_hash_mask = nht->hash_mask;
1861 		rcu_read_unlock_bh();
1862 
1863 		NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1864 	}
1865 
1866 	{
1867 		int cpu;
1868 		struct ndt_stats ndst;
1869 
1870 		memset(&ndst, 0, sizeof(ndst));
1871 
1872 		for_each_possible_cpu(cpu) {
1873 			struct neigh_statistics	*st;
1874 
1875 			st = per_cpu_ptr(tbl->stats, cpu);
1876 			ndst.ndts_allocs		+= st->allocs;
1877 			ndst.ndts_destroys		+= st->destroys;
1878 			ndst.ndts_hash_grows		+= st->hash_grows;
1879 			ndst.ndts_res_failed		+= st->res_failed;
1880 			ndst.ndts_lookups		+= st->lookups;
1881 			ndst.ndts_hits			+= st->hits;
1882 			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
1883 			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
1884 			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
1885 			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
1886 		}
1887 
1888 		NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1889 	}
1890 
1891 	BUG_ON(tbl->parms.dev);
1892 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1893 		goto nla_put_failure;
1894 
1895 	read_unlock_bh(&tbl->lock);
1896 	return nlmsg_end(skb, nlh);
1897 
1898 nla_put_failure:
1899 	read_unlock_bh(&tbl->lock);
1900 	nlmsg_cancel(skb, nlh);
1901 	return -EMSGSIZE;
1902 }
1903 
1904 static int neightbl_fill_param_info(struct sk_buff *skb,
1905 				    struct neigh_table *tbl,
1906 				    struct neigh_parms *parms,
1907 				    u32 pid, u32 seq, int type,
1908 				    unsigned int flags)
1909 {
1910 	struct ndtmsg *ndtmsg;
1911 	struct nlmsghdr *nlh;
1912 
1913 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1914 	if (nlh == NULL)
1915 		return -EMSGSIZE;
1916 
1917 	ndtmsg = nlmsg_data(nlh);
1918 
1919 	read_lock_bh(&tbl->lock);
1920 	ndtmsg->ndtm_family = tbl->family;
1921 	ndtmsg->ndtm_pad1   = 0;
1922 	ndtmsg->ndtm_pad2   = 0;
1923 
1924 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1925 	    neightbl_fill_parms(skb, parms) < 0)
1926 		goto errout;
1927 
1928 	read_unlock_bh(&tbl->lock);
1929 	return nlmsg_end(skb, nlh);
1930 errout:
1931 	read_unlock_bh(&tbl->lock);
1932 	nlmsg_cancel(skb, nlh);
1933 	return -EMSGSIZE;
1934 }
1935 
1936 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1937 	[NDTA_NAME]		= { .type = NLA_STRING },
1938 	[NDTA_THRESH1]		= { .type = NLA_U32 },
1939 	[NDTA_THRESH2]		= { .type = NLA_U32 },
1940 	[NDTA_THRESH3]		= { .type = NLA_U32 },
1941 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
1942 	[NDTA_PARMS]		= { .type = NLA_NESTED },
1943 };
1944 
1945 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1946 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
1947 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
1948 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
1949 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
1950 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
1951 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
1952 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
1953 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
1954 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
1955 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
1956 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
1957 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
1958 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
1959 };
1960 
1961 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1962 {
1963 	struct net *net = sock_net(skb->sk);
1964 	struct neigh_table *tbl;
1965 	struct ndtmsg *ndtmsg;
1966 	struct nlattr *tb[NDTA_MAX+1];
1967 	int err;
1968 
1969 	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1970 			  nl_neightbl_policy);
1971 	if (err < 0)
1972 		goto errout;
1973 
1974 	if (tb[NDTA_NAME] == NULL) {
1975 		err = -EINVAL;
1976 		goto errout;
1977 	}
1978 
1979 	ndtmsg = nlmsg_data(nlh);
1980 	read_lock(&neigh_tbl_lock);
1981 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1982 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1983 			continue;
1984 
1985 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1986 			break;
1987 	}
1988 
1989 	if (tbl == NULL) {
1990 		err = -ENOENT;
1991 		goto errout_locked;
1992 	}
1993 
1994 	/*
1995 	 * We acquire tbl->lock to be nice to the periodic timers and
1996 	 * make sure they always see a consistent set of values.
1997 	 */
1998 	write_lock_bh(&tbl->lock);
1999 
2000 	if (tb[NDTA_PARMS]) {
2001 		struct nlattr *tbp[NDTPA_MAX+1];
2002 		struct neigh_parms *p;
2003 		int i, ifindex = 0;
2004 
2005 		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2006 				       nl_ntbl_parm_policy);
2007 		if (err < 0)
2008 			goto errout_tbl_lock;
2009 
2010 		if (tbp[NDTPA_IFINDEX])
2011 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2012 
2013 		p = lookup_neigh_parms(tbl, net, ifindex);
2014 		if (p == NULL) {
2015 			err = -ENOENT;
2016 			goto errout_tbl_lock;
2017 		}
2018 
2019 		for (i = 1; i <= NDTPA_MAX; i++) {
2020 			if (tbp[i] == NULL)
2021 				continue;
2022 
2023 			switch (i) {
2024 			case NDTPA_QUEUE_LEN:
2025 				p->queue_len = nla_get_u32(tbp[i]);
2026 				break;
2027 			case NDTPA_PROXY_QLEN:
2028 				p->proxy_qlen = nla_get_u32(tbp[i]);
2029 				break;
2030 			case NDTPA_APP_PROBES:
2031 				p->app_probes = nla_get_u32(tbp[i]);
2032 				break;
2033 			case NDTPA_UCAST_PROBES:
2034 				p->ucast_probes = nla_get_u32(tbp[i]);
2035 				break;
2036 			case NDTPA_MCAST_PROBES:
2037 				p->mcast_probes = nla_get_u32(tbp[i]);
2038 				break;
2039 			case NDTPA_BASE_REACHABLE_TIME:
2040 				p->base_reachable_time = nla_get_msecs(tbp[i]);
2041 				break;
2042 			case NDTPA_GC_STALETIME:
2043 				p->gc_staletime = nla_get_msecs(tbp[i]);
2044 				break;
2045 			case NDTPA_DELAY_PROBE_TIME:
2046 				p->delay_probe_time = nla_get_msecs(tbp[i]);
2047 				break;
2048 			case NDTPA_RETRANS_TIME:
2049 				p->retrans_time = nla_get_msecs(tbp[i]);
2050 				break;
2051 			case NDTPA_ANYCAST_DELAY:
2052 				p->anycast_delay = nla_get_msecs(tbp[i]);
2053 				break;
2054 			case NDTPA_PROXY_DELAY:
2055 				p->proxy_delay = nla_get_msecs(tbp[i]);
2056 				break;
2057 			case NDTPA_LOCKTIME:
2058 				p->locktime = nla_get_msecs(tbp[i]);
2059 				break;
2060 			}
2061 		}
2062 	}
2063 
2064 	if (tb[NDTA_THRESH1])
2065 		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2066 
2067 	if (tb[NDTA_THRESH2])
2068 		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2069 
2070 	if (tb[NDTA_THRESH3])
2071 		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2072 
2073 	if (tb[NDTA_GC_INTERVAL])
2074 		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2075 
2076 	err = 0;
2077 
2078 errout_tbl_lock:
2079 	write_unlock_bh(&tbl->lock);
2080 errout_locked:
2081 	read_unlock(&neigh_tbl_lock);
2082 errout:
2083 	return err;
2084 }
2085 
2086 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2087 {
2088 	struct net *net = sock_net(skb->sk);
2089 	int family, tidx, nidx = 0;
2090 	int tbl_skip = cb->args[0];
2091 	int neigh_skip = cb->args[1];
2092 	struct neigh_table *tbl;
2093 
2094 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2095 
2096 	read_lock(&neigh_tbl_lock);
2097 	for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2098 		struct neigh_parms *p;
2099 
2100 		if (tidx < tbl_skip || (family && tbl->family != family))
2101 			continue;
2102 
2103 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2104 				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2105 				       NLM_F_MULTI) <= 0)
2106 			break;
2107 
2108 		for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2109 			if (!net_eq(neigh_parms_net(p), net))
2110 				continue;
2111 
2112 			if (nidx < neigh_skip)
2113 				goto next;
2114 
2115 			if (neightbl_fill_param_info(skb, tbl, p,
2116 						     NETLINK_CB(cb->skb).pid,
2117 						     cb->nlh->nlmsg_seq,
2118 						     RTM_NEWNEIGHTBL,
2119 						     NLM_F_MULTI) <= 0)
2120 				goto out;
2121 		next:
2122 			nidx++;
2123 		}
2124 
2125 		neigh_skip = 0;
2126 	}
2127 out:
2128 	read_unlock(&neigh_tbl_lock);
2129 	cb->args[0] = tidx;
2130 	cb->args[1] = nidx;
2131 
2132 	return skb->len;
2133 }
2134 
2135 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2136 			   u32 pid, u32 seq, int type, unsigned int flags)
2137 {
2138 	unsigned long now = jiffies;
2139 	struct nda_cacheinfo ci;
2140 	struct nlmsghdr *nlh;
2141 	struct ndmsg *ndm;
2142 
2143 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2144 	if (nlh == NULL)
2145 		return -EMSGSIZE;
2146 
2147 	ndm = nlmsg_data(nlh);
2148 	ndm->ndm_family	 = neigh->ops->family;
2149 	ndm->ndm_pad1    = 0;
2150 	ndm->ndm_pad2    = 0;
2151 	ndm->ndm_flags	 = neigh->flags;
2152 	ndm->ndm_type	 = neigh->type;
2153 	ndm->ndm_ifindex = neigh->dev->ifindex;
2154 
2155 	NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2156 
2157 	read_lock_bh(&neigh->lock);
2158 	ndm->ndm_state	 = neigh->nud_state;
2159 	if (neigh->nud_state & NUD_VALID) {
2160 		char haddr[MAX_ADDR_LEN];
2161 
2162 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2163 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2164 			read_unlock_bh(&neigh->lock);
2165 			goto nla_put_failure;
2166 		}
2167 	}
2168 
2169 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2170 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2171 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2172 	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
2173 	read_unlock_bh(&neigh->lock);
2174 
2175 	NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2176 	NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2177 
2178 	return nlmsg_end(skb, nlh);
2179 
2180 nla_put_failure:
2181 	nlmsg_cancel(skb, nlh);
2182 	return -EMSGSIZE;
2183 }
2184 
2185 static void neigh_update_notify(struct neighbour *neigh)
2186 {
2187 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2188 	__neigh_notify(neigh, RTM_NEWNEIGH, 0);
2189 }
2190 
2191 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2192 			    struct netlink_callback *cb)
2193 {
2194 	struct net *net = sock_net(skb->sk);
2195 	struct neighbour *n;
2196 	int rc, h, s_h = cb->args[1];
2197 	int idx, s_idx = idx = cb->args[2];
2198 	struct neigh_hash_table *nht;
2199 
2200 	rcu_read_lock_bh();
2201 	nht = rcu_dereference_bh(tbl->nht);
2202 
2203 	for (h = 0; h <= nht->hash_mask; h++) {
2204 		if (h < s_h)
2205 			continue;
2206 		if (h > s_h)
2207 			s_idx = 0;
2208 		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2209 		     n != NULL;
2210 		     n = rcu_dereference_bh(n->next)) {
2211 			if (!net_eq(dev_net(n->dev), net))
2212 				continue;
2213 			if (idx < s_idx)
2214 				goto next;
2215 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2216 					    cb->nlh->nlmsg_seq,
2217 					    RTM_NEWNEIGH,
2218 					    NLM_F_MULTI) <= 0) {
2219 				rc = -1;
2220 				goto out;
2221 			}
2222 next:
2223 			idx++;
2224 		}
2225 	}
2226 	rc = skb->len;
2227 out:
2228 	rcu_read_unlock_bh();
2229 	cb->args[1] = h;
2230 	cb->args[2] = idx;
2231 	return rc;
2232 }
2233 
2234 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2235 {
2236 	struct neigh_table *tbl;
2237 	int t, family, s_t;
2238 
2239 	read_lock(&neigh_tbl_lock);
2240 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2241 	s_t = cb->args[0];
2242 
2243 	for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2244 		if (t < s_t || (family && tbl->family != family))
2245 			continue;
2246 		if (t > s_t)
2247 			memset(&cb->args[1], 0, sizeof(cb->args) -
2248 						sizeof(cb->args[0]));
2249 		if (neigh_dump_table(tbl, skb, cb) < 0)
2250 			break;
2251 	}
2252 	read_unlock(&neigh_tbl_lock);
2253 
2254 	cb->args[0] = t;
2255 	return skb->len;
2256 }
2257 
2258 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2259 {
2260 	int chain;
2261 	struct neigh_hash_table *nht;
2262 
2263 	rcu_read_lock_bh();
2264 	nht = rcu_dereference_bh(tbl->nht);
2265 
2266 	read_lock(&tbl->lock); /* avoid resizes */
2267 	for (chain = 0; chain <= nht->hash_mask; chain++) {
2268 		struct neighbour *n;
2269 
2270 		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2271 		     n != NULL;
2272 		     n = rcu_dereference_bh(n->next))
2273 			cb(n, cookie);
2274 	}
2275 	read_unlock(&tbl->lock);
2276 	rcu_read_unlock_bh();
2277 }
2278 EXPORT_SYMBOL(neigh_for_each);
2279 
2280 /* The tbl->lock must be held as a writer and BH disabled. */
2281 void __neigh_for_each_release(struct neigh_table *tbl,
2282 			      int (*cb)(struct neighbour *))
2283 {
2284 	int chain;
2285 	struct neigh_hash_table *nht;
2286 
2287 	nht = rcu_dereference_protected(tbl->nht,
2288 					lockdep_is_held(&tbl->lock));
2289 	for (chain = 0; chain <= nht->hash_mask; chain++) {
2290 		struct neighbour *n;
2291 		struct neighbour __rcu **np;
2292 
2293 		np = &nht->hash_buckets[chain];
2294 		while ((n = rcu_dereference_protected(*np,
2295 					lockdep_is_held(&tbl->lock))) != NULL) {
2296 			int release;
2297 
2298 			write_lock(&n->lock);
2299 			release = cb(n);
2300 			if (release) {
2301 				rcu_assign_pointer(*np,
2302 					rcu_dereference_protected(n->next,
2303 						lockdep_is_held(&tbl->lock)));
2304 				n->dead = 1;
2305 			} else
2306 				np = &n->next;
2307 			write_unlock(&n->lock);
2308 			if (release)
2309 				neigh_cleanup_and_release(n);
2310 		}
2311 	}
2312 }
2313 EXPORT_SYMBOL(__neigh_for_each_release);
2314 
2315 #ifdef CONFIG_PROC_FS
2316 
2317 static struct neighbour *neigh_get_first(struct seq_file *seq)
2318 {
2319 	struct neigh_seq_state *state = seq->private;
2320 	struct net *net = seq_file_net(seq);
2321 	struct neigh_hash_table *nht = state->nht;
2322 	struct neighbour *n = NULL;
2323 	int bucket = state->bucket;
2324 
2325 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2326 	for (bucket = 0; bucket <= nht->hash_mask; bucket++) {
2327 		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2328 
2329 		while (n) {
2330 			if (!net_eq(dev_net(n->dev), net))
2331 				goto next;
2332 			if (state->neigh_sub_iter) {
2333 				loff_t fakep = 0;
2334 				void *v;
2335 
2336 				v = state->neigh_sub_iter(state, n, &fakep);
2337 				if (!v)
2338 					goto next;
2339 			}
2340 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2341 				break;
2342 			if (n->nud_state & ~NUD_NOARP)
2343 				break;
2344 next:
2345 			n = rcu_dereference_bh(n->next);
2346 		}
2347 
2348 		if (n)
2349 			break;
2350 	}
2351 	state->bucket = bucket;
2352 
2353 	return n;
2354 }
2355 
2356 static struct neighbour *neigh_get_next(struct seq_file *seq,
2357 					struct neighbour *n,
2358 					loff_t *pos)
2359 {
2360 	struct neigh_seq_state *state = seq->private;
2361 	struct net *net = seq_file_net(seq);
2362 	struct neigh_hash_table *nht = state->nht;
2363 
2364 	if (state->neigh_sub_iter) {
2365 		void *v = state->neigh_sub_iter(state, n, pos);
2366 		if (v)
2367 			return n;
2368 	}
2369 	n = rcu_dereference_bh(n->next);
2370 
2371 	while (1) {
2372 		while (n) {
2373 			if (!net_eq(dev_net(n->dev), net))
2374 				goto next;
2375 			if (state->neigh_sub_iter) {
2376 				void *v = state->neigh_sub_iter(state, n, pos);
2377 				if (v)
2378 					return n;
2379 				goto next;
2380 			}
2381 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2382 				break;
2383 
2384 			if (n->nud_state & ~NUD_NOARP)
2385 				break;
2386 next:
2387 			n = rcu_dereference_bh(n->next);
2388 		}
2389 
2390 		if (n)
2391 			break;
2392 
2393 		if (++state->bucket > nht->hash_mask)
2394 			break;
2395 
2396 		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2397 	}
2398 
2399 	if (n && pos)
2400 		--(*pos);
2401 	return n;
2402 }
2403 
2404 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2405 {
2406 	struct neighbour *n = neigh_get_first(seq);
2407 
2408 	if (n) {
2409 		--(*pos);
2410 		while (*pos) {
2411 			n = neigh_get_next(seq, n, pos);
2412 			if (!n)
2413 				break;
2414 		}
2415 	}
2416 	return *pos ? NULL : n;
2417 }
2418 
2419 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2420 {
2421 	struct neigh_seq_state *state = seq->private;
2422 	struct net *net = seq_file_net(seq);
2423 	struct neigh_table *tbl = state->tbl;
2424 	struct pneigh_entry *pn = NULL;
2425 	int bucket = state->bucket;
2426 
2427 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
2428 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2429 		pn = tbl->phash_buckets[bucket];
2430 		while (pn && !net_eq(pneigh_net(pn), net))
2431 			pn = pn->next;
2432 		if (pn)
2433 			break;
2434 	}
2435 	state->bucket = bucket;
2436 
2437 	return pn;
2438 }
2439 
2440 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2441 					    struct pneigh_entry *pn,
2442 					    loff_t *pos)
2443 {
2444 	struct neigh_seq_state *state = seq->private;
2445 	struct net *net = seq_file_net(seq);
2446 	struct neigh_table *tbl = state->tbl;
2447 
2448 	pn = pn->next;
2449 	while (!pn) {
2450 		if (++state->bucket > PNEIGH_HASHMASK)
2451 			break;
2452 		pn = tbl->phash_buckets[state->bucket];
2453 		while (pn && !net_eq(pneigh_net(pn), net))
2454 			pn = pn->next;
2455 		if (pn)
2456 			break;
2457 	}
2458 
2459 	if (pn && pos)
2460 		--(*pos);
2461 
2462 	return pn;
2463 }
2464 
2465 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2466 {
2467 	struct pneigh_entry *pn = pneigh_get_first(seq);
2468 
2469 	if (pn) {
2470 		--(*pos);
2471 		while (*pos) {
2472 			pn = pneigh_get_next(seq, pn, pos);
2473 			if (!pn)
2474 				break;
2475 		}
2476 	}
2477 	return *pos ? NULL : pn;
2478 }
2479 
2480 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2481 {
2482 	struct neigh_seq_state *state = seq->private;
2483 	void *rc;
2484 	loff_t idxpos = *pos;
2485 
2486 	rc = neigh_get_idx(seq, &idxpos);
2487 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2488 		rc = pneigh_get_idx(seq, &idxpos);
2489 
2490 	return rc;
2491 }
2492 
2493 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2494 	__acquires(rcu_bh)
2495 {
2496 	struct neigh_seq_state *state = seq->private;
2497 
2498 	state->tbl = tbl;
2499 	state->bucket = 0;
2500 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2501 
2502 	rcu_read_lock_bh();
2503 	state->nht = rcu_dereference_bh(tbl->nht);
2504 
2505 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2506 }
2507 EXPORT_SYMBOL(neigh_seq_start);
2508 
2509 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2510 {
2511 	struct neigh_seq_state *state;
2512 	void *rc;
2513 
2514 	if (v == SEQ_START_TOKEN) {
2515 		rc = neigh_get_first(seq);
2516 		goto out;
2517 	}
2518 
2519 	state = seq->private;
2520 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2521 		rc = neigh_get_next(seq, v, NULL);
2522 		if (rc)
2523 			goto out;
2524 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2525 			rc = pneigh_get_first(seq);
2526 	} else {
2527 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2528 		rc = pneigh_get_next(seq, v, NULL);
2529 	}
2530 out:
2531 	++(*pos);
2532 	return rc;
2533 }
2534 EXPORT_SYMBOL(neigh_seq_next);
2535 
2536 void neigh_seq_stop(struct seq_file *seq, void *v)
2537 	__releases(rcu_bh)
2538 {
2539 	rcu_read_unlock_bh();
2540 }
2541 EXPORT_SYMBOL(neigh_seq_stop);
2542 
2543 /* statistics via seq_file */
2544 
2545 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2546 {
2547 	struct neigh_table *tbl = seq->private;
2548 	int cpu;
2549 
2550 	if (*pos == 0)
2551 		return SEQ_START_TOKEN;
2552 
2553 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2554 		if (!cpu_possible(cpu))
2555 			continue;
2556 		*pos = cpu+1;
2557 		return per_cpu_ptr(tbl->stats, cpu);
2558 	}
2559 	return NULL;
2560 }
2561 
2562 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2563 {
2564 	struct neigh_table *tbl = seq->private;
2565 	int cpu;
2566 
2567 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2568 		if (!cpu_possible(cpu))
2569 			continue;
2570 		*pos = cpu+1;
2571 		return per_cpu_ptr(tbl->stats, cpu);
2572 	}
2573 	return NULL;
2574 }
2575 
2576 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2577 {
2578 
2579 }
2580 
2581 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2582 {
2583 	struct neigh_table *tbl = seq->private;
2584 	struct neigh_statistics *st = v;
2585 
2586 	if (v == SEQ_START_TOKEN) {
2587 		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2588 		return 0;
2589 	}
2590 
2591 	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2592 			"%08lx %08lx  %08lx %08lx %08lx\n",
2593 		   atomic_read(&tbl->entries),
2594 
2595 		   st->allocs,
2596 		   st->destroys,
2597 		   st->hash_grows,
2598 
2599 		   st->lookups,
2600 		   st->hits,
2601 
2602 		   st->res_failed,
2603 
2604 		   st->rcv_probes_mcast,
2605 		   st->rcv_probes_ucast,
2606 
2607 		   st->periodic_gc_runs,
2608 		   st->forced_gc_runs,
2609 		   st->unres_discards
2610 		   );
2611 
2612 	return 0;
2613 }
2614 
2615 static const struct seq_operations neigh_stat_seq_ops = {
2616 	.start	= neigh_stat_seq_start,
2617 	.next	= neigh_stat_seq_next,
2618 	.stop	= neigh_stat_seq_stop,
2619 	.show	= neigh_stat_seq_show,
2620 };
2621 
2622 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2623 {
2624 	int ret = seq_open(file, &neigh_stat_seq_ops);
2625 
2626 	if (!ret) {
2627 		struct seq_file *sf = file->private_data;
2628 		sf->private = PDE(inode)->data;
2629 	}
2630 	return ret;
2631 };
2632 
2633 static const struct file_operations neigh_stat_seq_fops = {
2634 	.owner	 = THIS_MODULE,
2635 	.open 	 = neigh_stat_seq_open,
2636 	.read	 = seq_read,
2637 	.llseek	 = seq_lseek,
2638 	.release = seq_release,
2639 };
2640 
2641 #endif /* CONFIG_PROC_FS */
2642 
2643 static inline size_t neigh_nlmsg_size(void)
2644 {
2645 	return NLMSG_ALIGN(sizeof(struct ndmsg))
2646 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2647 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2648 	       + nla_total_size(sizeof(struct nda_cacheinfo))
2649 	       + nla_total_size(4); /* NDA_PROBES */
2650 }
2651 
2652 static void __neigh_notify(struct neighbour *n, int type, int flags)
2653 {
2654 	struct net *net = dev_net(n->dev);
2655 	struct sk_buff *skb;
2656 	int err = -ENOBUFS;
2657 
2658 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2659 	if (skb == NULL)
2660 		goto errout;
2661 
2662 	err = neigh_fill_info(skb, n, 0, 0, type, flags);
2663 	if (err < 0) {
2664 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2665 		WARN_ON(err == -EMSGSIZE);
2666 		kfree_skb(skb);
2667 		goto errout;
2668 	}
2669 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2670 	return;
2671 errout:
2672 	if (err < 0)
2673 		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2674 }
2675 
2676 #ifdef CONFIG_ARPD
2677 void neigh_app_ns(struct neighbour *n)
2678 {
2679 	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2680 }
2681 EXPORT_SYMBOL(neigh_app_ns);
2682 #endif /* CONFIG_ARPD */
2683 
2684 #ifdef CONFIG_SYSCTL
2685 
2686 #define NEIGH_VARS_MAX 19
2687 
2688 static struct neigh_sysctl_table {
2689 	struct ctl_table_header *sysctl_header;
2690 	struct ctl_table neigh_vars[NEIGH_VARS_MAX];
2691 	char *dev_name;
2692 } neigh_sysctl_template __read_mostly = {
2693 	.neigh_vars = {
2694 		{
2695 			.procname	= "mcast_solicit",
2696 			.maxlen		= sizeof(int),
2697 			.mode		= 0644,
2698 			.proc_handler	= proc_dointvec,
2699 		},
2700 		{
2701 			.procname	= "ucast_solicit",
2702 			.maxlen		= sizeof(int),
2703 			.mode		= 0644,
2704 			.proc_handler	= proc_dointvec,
2705 		},
2706 		{
2707 			.procname	= "app_solicit",
2708 			.maxlen		= sizeof(int),
2709 			.mode		= 0644,
2710 			.proc_handler	= proc_dointvec,
2711 		},
2712 		{
2713 			.procname	= "retrans_time",
2714 			.maxlen		= sizeof(int),
2715 			.mode		= 0644,
2716 			.proc_handler	= proc_dointvec_userhz_jiffies,
2717 		},
2718 		{
2719 			.procname	= "base_reachable_time",
2720 			.maxlen		= sizeof(int),
2721 			.mode		= 0644,
2722 			.proc_handler	= proc_dointvec_jiffies,
2723 		},
2724 		{
2725 			.procname	= "delay_first_probe_time",
2726 			.maxlen		= sizeof(int),
2727 			.mode		= 0644,
2728 			.proc_handler	= proc_dointvec_jiffies,
2729 		},
2730 		{
2731 			.procname	= "gc_stale_time",
2732 			.maxlen		= sizeof(int),
2733 			.mode		= 0644,
2734 			.proc_handler	= proc_dointvec_jiffies,
2735 		},
2736 		{
2737 			.procname	= "unres_qlen",
2738 			.maxlen		= sizeof(int),
2739 			.mode		= 0644,
2740 			.proc_handler	= proc_dointvec,
2741 		},
2742 		{
2743 			.procname	= "proxy_qlen",
2744 			.maxlen		= sizeof(int),
2745 			.mode		= 0644,
2746 			.proc_handler	= proc_dointvec,
2747 		},
2748 		{
2749 			.procname	= "anycast_delay",
2750 			.maxlen		= sizeof(int),
2751 			.mode		= 0644,
2752 			.proc_handler	= proc_dointvec_userhz_jiffies,
2753 		},
2754 		{
2755 			.procname	= "proxy_delay",
2756 			.maxlen		= sizeof(int),
2757 			.mode		= 0644,
2758 			.proc_handler	= proc_dointvec_userhz_jiffies,
2759 		},
2760 		{
2761 			.procname	= "locktime",
2762 			.maxlen		= sizeof(int),
2763 			.mode		= 0644,
2764 			.proc_handler	= proc_dointvec_userhz_jiffies,
2765 		},
2766 		{
2767 			.procname	= "retrans_time_ms",
2768 			.maxlen		= sizeof(int),
2769 			.mode		= 0644,
2770 			.proc_handler	= proc_dointvec_ms_jiffies,
2771 		},
2772 		{
2773 			.procname	= "base_reachable_time_ms",
2774 			.maxlen		= sizeof(int),
2775 			.mode		= 0644,
2776 			.proc_handler	= proc_dointvec_ms_jiffies,
2777 		},
2778 		{
2779 			.procname	= "gc_interval",
2780 			.maxlen		= sizeof(int),
2781 			.mode		= 0644,
2782 			.proc_handler	= proc_dointvec_jiffies,
2783 		},
2784 		{
2785 			.procname	= "gc_thresh1",
2786 			.maxlen		= sizeof(int),
2787 			.mode		= 0644,
2788 			.proc_handler	= proc_dointvec,
2789 		},
2790 		{
2791 			.procname	= "gc_thresh2",
2792 			.maxlen		= sizeof(int),
2793 			.mode		= 0644,
2794 			.proc_handler	= proc_dointvec,
2795 		},
2796 		{
2797 			.procname	= "gc_thresh3",
2798 			.maxlen		= sizeof(int),
2799 			.mode		= 0644,
2800 			.proc_handler	= proc_dointvec,
2801 		},
2802 		{},
2803 	},
2804 };
2805 
2806 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2807 			  char *p_name, proc_handler *handler)
2808 {
2809 	struct neigh_sysctl_table *t;
2810 	const char *dev_name_source = NULL;
2811 
2812 #define NEIGH_CTL_PATH_ROOT	0
2813 #define NEIGH_CTL_PATH_PROTO	1
2814 #define NEIGH_CTL_PATH_NEIGH	2
2815 #define NEIGH_CTL_PATH_DEV	3
2816 
2817 	struct ctl_path neigh_path[] = {
2818 		{ .procname = "net",	 },
2819 		{ .procname = "proto",	 },
2820 		{ .procname = "neigh",	 },
2821 		{ .procname = "default", },
2822 		{ },
2823 	};
2824 
2825 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2826 	if (!t)
2827 		goto err;
2828 
2829 	t->neigh_vars[0].data  = &p->mcast_probes;
2830 	t->neigh_vars[1].data  = &p->ucast_probes;
2831 	t->neigh_vars[2].data  = &p->app_probes;
2832 	t->neigh_vars[3].data  = &p->retrans_time;
2833 	t->neigh_vars[4].data  = &p->base_reachable_time;
2834 	t->neigh_vars[5].data  = &p->delay_probe_time;
2835 	t->neigh_vars[6].data  = &p->gc_staletime;
2836 	t->neigh_vars[7].data  = &p->queue_len;
2837 	t->neigh_vars[8].data  = &p->proxy_qlen;
2838 	t->neigh_vars[9].data  = &p->anycast_delay;
2839 	t->neigh_vars[10].data = &p->proxy_delay;
2840 	t->neigh_vars[11].data = &p->locktime;
2841 	t->neigh_vars[12].data  = &p->retrans_time;
2842 	t->neigh_vars[13].data  = &p->base_reachable_time;
2843 
2844 	if (dev) {
2845 		dev_name_source = dev->name;
2846 		/* Terminate the table early */
2847 		memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
2848 	} else {
2849 		dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2850 		t->neigh_vars[14].data = (int *)(p + 1);
2851 		t->neigh_vars[15].data = (int *)(p + 1) + 1;
2852 		t->neigh_vars[16].data = (int *)(p + 1) + 2;
2853 		t->neigh_vars[17].data = (int *)(p + 1) + 3;
2854 	}
2855 
2856 
2857 	if (handler) {
2858 		/* RetransTime */
2859 		t->neigh_vars[3].proc_handler = handler;
2860 		t->neigh_vars[3].extra1 = dev;
2861 		/* ReachableTime */
2862 		t->neigh_vars[4].proc_handler = handler;
2863 		t->neigh_vars[4].extra1 = dev;
2864 		/* RetransTime (in milliseconds)*/
2865 		t->neigh_vars[12].proc_handler = handler;
2866 		t->neigh_vars[12].extra1 = dev;
2867 		/* ReachableTime (in milliseconds) */
2868 		t->neigh_vars[13].proc_handler = handler;
2869 		t->neigh_vars[13].extra1 = dev;
2870 	}
2871 
2872 	t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2873 	if (!t->dev_name)
2874 		goto free;
2875 
2876 	neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2877 	neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2878 
2879 	t->sysctl_header =
2880 		register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2881 	if (!t->sysctl_header)
2882 		goto free_procname;
2883 
2884 	p->sysctl_table = t;
2885 	return 0;
2886 
2887 free_procname:
2888 	kfree(t->dev_name);
2889 free:
2890 	kfree(t);
2891 err:
2892 	return -ENOBUFS;
2893 }
2894 EXPORT_SYMBOL(neigh_sysctl_register);
2895 
2896 void neigh_sysctl_unregister(struct neigh_parms *p)
2897 {
2898 	if (p->sysctl_table) {
2899 		struct neigh_sysctl_table *t = p->sysctl_table;
2900 		p->sysctl_table = NULL;
2901 		unregister_sysctl_table(t->sysctl_header);
2902 		kfree(t->dev_name);
2903 		kfree(t);
2904 	}
2905 }
2906 EXPORT_SYMBOL(neigh_sysctl_unregister);
2907 
2908 #endif	/* CONFIG_SYSCTL */
2909 
2910 static int __init neigh_init(void)
2911 {
2912 	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL);
2913 	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL);
2914 	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info);
2915 
2916 	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info);
2917 	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL);
2918 
2919 	return 0;
2920 }
2921 
2922 subsys_initcall(neigh_init);
2923 
2924