xref: /openbmc/linux/net/core/neighbour.c (revision 5c25f686db352082eef8daa21b760192351a023a)
1 /*
2  *	Generic address resolution entity
3  *
4  *	Authors:
5  *	Pedro Roque		<roque@di.fc.ul.pt>
6  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *	Fixes:
14  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
15  *	Harald Welte		Add neighbour cache statistics like rtstat
16  */
17 
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 
40 #define NEIGH_DEBUG 1
41 
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46 
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
55 
56 #define PNEIGH_HASHMASK		0xF
57 
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62 
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
67 
68 /*
69    Neighbour hash table buckets are protected with rwlock tbl->lock.
70 
71    - All the scans/updates to hash buckets MUST be made under this lock.
72    - NOTHING clever should be made under this lock: no callbacks
73      to protocol backends, no attempts to send something to network.
74      It will result in deadlocks, if backend/driver wants to use neighbour
75      cache.
76    - If the entry requires some non-trivial actions, increase
77      its reference count and release table lock.
78 
79    Neighbour entries are protected:
80    - with reference count.
81    - with rwlock neigh->lock
82 
83    Reference count prevents destruction.
84 
85    neigh->lock mainly serializes ll address data and its validity state.
86    However, the same lock is used to protect another entry fields:
87     - timer
88     - resolution queue
89 
90    Again, nothing clever shall be made under neigh->lock,
91    the most complicated procedure, which we allow is dev->hard_header.
92    It is supposed, that dev->hard_header is simplistic and does
93    not make callbacks to neighbour tables.
94 
95    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96    list of neighbour tables. This list is used only in process context,
97  */
98 
99 static DEFINE_RWLOCK(neigh_tbl_lock);
100 
101 static int neigh_blackhole(struct sk_buff *skb)
102 {
103 	kfree_skb(skb);
104 	return -ENETDOWN;
105 }
106 
107 static void neigh_cleanup_and_release(struct neighbour *neigh)
108 {
109 	if (neigh->parms->neigh_cleanup)
110 		neigh->parms->neigh_cleanup(neigh);
111 
112 	__neigh_notify(neigh, RTM_DELNEIGH, 0);
113 	neigh_release(neigh);
114 }
115 
116 /*
117  * It is random distribution in the interval (1/2)*base...(3/2)*base.
118  * It corresponds to default IPv6 settings and is not overridable,
119  * because it is really reasonable choice.
120  */
121 
122 unsigned long neigh_rand_reach_time(unsigned long base)
123 {
124 	return base ? (net_random() % base) + (base >> 1) : 0;
125 }
126 EXPORT_SYMBOL(neigh_rand_reach_time);
127 
128 
129 static int neigh_forced_gc(struct neigh_table *tbl)
130 {
131 	int shrunk = 0;
132 	int i;
133 	struct neigh_hash_table *nht;
134 
135 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136 
137 	write_lock_bh(&tbl->lock);
138 	nht = rcu_dereference_protected(tbl->nht,
139 					lockdep_is_held(&tbl->lock));
140 	for (i = 0; i < (1 << nht->hash_shift); i++) {
141 		struct neighbour *n;
142 		struct neighbour __rcu **np;
143 
144 		np = &nht->hash_buckets[i];
145 		while ((n = rcu_dereference_protected(*np,
146 					lockdep_is_held(&tbl->lock))) != NULL) {
147 			/* Neighbour record may be discarded if:
148 			 * - nobody refers to it.
149 			 * - it is not permanent
150 			 */
151 			write_lock(&n->lock);
152 			if (atomic_read(&n->refcnt) == 1 &&
153 			    !(n->nud_state & NUD_PERMANENT)) {
154 				rcu_assign_pointer(*np,
155 					rcu_dereference_protected(n->next,
156 						  lockdep_is_held(&tbl->lock)));
157 				n->dead = 1;
158 				shrunk	= 1;
159 				write_unlock(&n->lock);
160 				neigh_cleanup_and_release(n);
161 				continue;
162 			}
163 			write_unlock(&n->lock);
164 			np = &n->next;
165 		}
166 	}
167 
168 	tbl->last_flush = jiffies;
169 
170 	write_unlock_bh(&tbl->lock);
171 
172 	return shrunk;
173 }
174 
175 static void neigh_add_timer(struct neighbour *n, unsigned long when)
176 {
177 	neigh_hold(n);
178 	if (unlikely(mod_timer(&n->timer, when))) {
179 		printk("NEIGH: BUG, double timer add, state is %x\n",
180 		       n->nud_state);
181 		dump_stack();
182 	}
183 }
184 
185 static int neigh_del_timer(struct neighbour *n)
186 {
187 	if ((n->nud_state & NUD_IN_TIMER) &&
188 	    del_timer(&n->timer)) {
189 		neigh_release(n);
190 		return 1;
191 	}
192 	return 0;
193 }
194 
195 static void pneigh_queue_purge(struct sk_buff_head *list)
196 {
197 	struct sk_buff *skb;
198 
199 	while ((skb = skb_dequeue(list)) != NULL) {
200 		dev_put(skb->dev);
201 		kfree_skb(skb);
202 	}
203 }
204 
205 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
206 {
207 	int i;
208 	struct neigh_hash_table *nht;
209 
210 	nht = rcu_dereference_protected(tbl->nht,
211 					lockdep_is_held(&tbl->lock));
212 
213 	for (i = 0; i < (1 << nht->hash_shift); i++) {
214 		struct neighbour *n;
215 		struct neighbour __rcu **np = &nht->hash_buckets[i];
216 
217 		while ((n = rcu_dereference_protected(*np,
218 					lockdep_is_held(&tbl->lock))) != NULL) {
219 			if (dev && n->dev != dev) {
220 				np = &n->next;
221 				continue;
222 			}
223 			rcu_assign_pointer(*np,
224 				   rcu_dereference_protected(n->next,
225 						lockdep_is_held(&tbl->lock)));
226 			write_lock(&n->lock);
227 			neigh_del_timer(n);
228 			n->dead = 1;
229 
230 			if (atomic_read(&n->refcnt) != 1) {
231 				/* The most unpleasant situation.
232 				   We must destroy neighbour entry,
233 				   but someone still uses it.
234 
235 				   The destroy will be delayed until
236 				   the last user releases us, but
237 				   we must kill timers etc. and move
238 				   it to safe state.
239 				 */
240 				skb_queue_purge(&n->arp_queue);
241 				n->output = neigh_blackhole;
242 				if (n->nud_state & NUD_VALID)
243 					n->nud_state = NUD_NOARP;
244 				else
245 					n->nud_state = NUD_NONE;
246 				NEIGH_PRINTK2("neigh %p is stray.\n", n);
247 			}
248 			write_unlock(&n->lock);
249 			neigh_cleanup_and_release(n);
250 		}
251 	}
252 }
253 
254 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
255 {
256 	write_lock_bh(&tbl->lock);
257 	neigh_flush_dev(tbl, dev);
258 	write_unlock_bh(&tbl->lock);
259 }
260 EXPORT_SYMBOL(neigh_changeaddr);
261 
262 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
263 {
264 	write_lock_bh(&tbl->lock);
265 	neigh_flush_dev(tbl, dev);
266 	pneigh_ifdown(tbl, dev);
267 	write_unlock_bh(&tbl->lock);
268 
269 	del_timer_sync(&tbl->proxy_timer);
270 	pneigh_queue_purge(&tbl->proxy_queue);
271 	return 0;
272 }
273 EXPORT_SYMBOL(neigh_ifdown);
274 
275 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
276 {
277 	struct neighbour *n = NULL;
278 	unsigned long now = jiffies;
279 	int entries;
280 
281 	entries = atomic_inc_return(&tbl->entries) - 1;
282 	if (entries >= tbl->gc_thresh3 ||
283 	    (entries >= tbl->gc_thresh2 &&
284 	     time_after(now, tbl->last_flush + 5 * HZ))) {
285 		if (!neigh_forced_gc(tbl) &&
286 		    entries >= tbl->gc_thresh3)
287 			goto out_entries;
288 	}
289 
290 	n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
291 	if (!n)
292 		goto out_entries;
293 
294 	skb_queue_head_init(&n->arp_queue);
295 	rwlock_init(&n->lock);
296 	seqlock_init(&n->ha_lock);
297 	n->updated	  = n->used = now;
298 	n->nud_state	  = NUD_NONE;
299 	n->output	  = neigh_blackhole;
300 	n->parms	  = neigh_parms_clone(&tbl->parms);
301 	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
302 
303 	NEIGH_CACHE_STAT_INC(tbl, allocs);
304 	n->tbl		  = tbl;
305 	atomic_set(&n->refcnt, 1);
306 	n->dead		  = 1;
307 out:
308 	return n;
309 
310 out_entries:
311 	atomic_dec(&tbl->entries);
312 	goto out;
313 }
314 
315 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
316 {
317 	size_t size = (1 << shift) * sizeof(struct neighbour *);
318 	struct neigh_hash_table *ret;
319 	struct neighbour __rcu **buckets;
320 
321 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
322 	if (!ret)
323 		return NULL;
324 	if (size <= PAGE_SIZE)
325 		buckets = kzalloc(size, GFP_ATOMIC);
326 	else
327 		buckets = (struct neighbour __rcu **)
328 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
329 					   get_order(size));
330 	if (!buckets) {
331 		kfree(ret);
332 		return NULL;
333 	}
334 	ret->hash_buckets = buckets;
335 	ret->hash_shift = shift;
336 	get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
337 	ret->hash_rnd |= 1;
338 	return ret;
339 }
340 
341 static void neigh_hash_free_rcu(struct rcu_head *head)
342 {
343 	struct neigh_hash_table *nht = container_of(head,
344 						    struct neigh_hash_table,
345 						    rcu);
346 	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
347 	struct neighbour __rcu **buckets = nht->hash_buckets;
348 
349 	if (size <= PAGE_SIZE)
350 		kfree(buckets);
351 	else
352 		free_pages((unsigned long)buckets, get_order(size));
353 	kfree(nht);
354 }
355 
356 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
357 						unsigned long new_shift)
358 {
359 	unsigned int i, hash;
360 	struct neigh_hash_table *new_nht, *old_nht;
361 
362 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
363 
364 	old_nht = rcu_dereference_protected(tbl->nht,
365 					    lockdep_is_held(&tbl->lock));
366 	new_nht = neigh_hash_alloc(new_shift);
367 	if (!new_nht)
368 		return old_nht;
369 
370 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
371 		struct neighbour *n, *next;
372 
373 		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
374 						   lockdep_is_held(&tbl->lock));
375 		     n != NULL;
376 		     n = next) {
377 			hash = tbl->hash(n->primary_key, n->dev,
378 					 new_nht->hash_rnd);
379 
380 			hash >>= (32 - new_nht->hash_shift);
381 			next = rcu_dereference_protected(n->next,
382 						lockdep_is_held(&tbl->lock));
383 
384 			rcu_assign_pointer(n->next,
385 					   rcu_dereference_protected(
386 						new_nht->hash_buckets[hash],
387 						lockdep_is_held(&tbl->lock)));
388 			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
389 		}
390 	}
391 
392 	rcu_assign_pointer(tbl->nht, new_nht);
393 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
394 	return new_nht;
395 }
396 
397 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
398 			       struct net_device *dev)
399 {
400 	struct neighbour *n;
401 	int key_len = tbl->key_len;
402 	u32 hash_val;
403 	struct neigh_hash_table *nht;
404 
405 	NEIGH_CACHE_STAT_INC(tbl, lookups);
406 
407 	rcu_read_lock_bh();
408 	nht = rcu_dereference_bh(tbl->nht);
409 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
410 
411 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
412 	     n != NULL;
413 	     n = rcu_dereference_bh(n->next)) {
414 		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
415 			if (!atomic_inc_not_zero(&n->refcnt))
416 				n = NULL;
417 			NEIGH_CACHE_STAT_INC(tbl, hits);
418 			break;
419 		}
420 	}
421 
422 	rcu_read_unlock_bh();
423 	return n;
424 }
425 EXPORT_SYMBOL(neigh_lookup);
426 
427 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
428 				     const void *pkey)
429 {
430 	struct neighbour *n;
431 	int key_len = tbl->key_len;
432 	u32 hash_val;
433 	struct neigh_hash_table *nht;
434 
435 	NEIGH_CACHE_STAT_INC(tbl, lookups);
436 
437 	rcu_read_lock_bh();
438 	nht = rcu_dereference_bh(tbl->nht);
439 	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
440 
441 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
442 	     n != NULL;
443 	     n = rcu_dereference_bh(n->next)) {
444 		if (!memcmp(n->primary_key, pkey, key_len) &&
445 		    net_eq(dev_net(n->dev), net)) {
446 			if (!atomic_inc_not_zero(&n->refcnt))
447 				n = NULL;
448 			NEIGH_CACHE_STAT_INC(tbl, hits);
449 			break;
450 		}
451 	}
452 
453 	rcu_read_unlock_bh();
454 	return n;
455 }
456 EXPORT_SYMBOL(neigh_lookup_nodev);
457 
458 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
459 			       struct net_device *dev)
460 {
461 	u32 hash_val;
462 	int key_len = tbl->key_len;
463 	int error;
464 	struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
465 	struct neigh_hash_table *nht;
466 
467 	if (!n) {
468 		rc = ERR_PTR(-ENOBUFS);
469 		goto out;
470 	}
471 
472 	memcpy(n->primary_key, pkey, key_len);
473 	n->dev = dev;
474 	dev_hold(dev);
475 
476 	/* Protocol specific setup. */
477 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
478 		rc = ERR_PTR(error);
479 		goto out_neigh_release;
480 	}
481 
482 	/* Device specific setup. */
483 	if (n->parms->neigh_setup &&
484 	    (error = n->parms->neigh_setup(n)) < 0) {
485 		rc = ERR_PTR(error);
486 		goto out_neigh_release;
487 	}
488 
489 	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
490 
491 	write_lock_bh(&tbl->lock);
492 	nht = rcu_dereference_protected(tbl->nht,
493 					lockdep_is_held(&tbl->lock));
494 
495 	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
496 		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
497 
498 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
499 
500 	if (n->parms->dead) {
501 		rc = ERR_PTR(-EINVAL);
502 		goto out_tbl_unlock;
503 	}
504 
505 	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
506 					    lockdep_is_held(&tbl->lock));
507 	     n1 != NULL;
508 	     n1 = rcu_dereference_protected(n1->next,
509 			lockdep_is_held(&tbl->lock))) {
510 		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
511 			neigh_hold(n1);
512 			rc = n1;
513 			goto out_tbl_unlock;
514 		}
515 	}
516 
517 	n->dead = 0;
518 	neigh_hold(n);
519 	rcu_assign_pointer(n->next,
520 			   rcu_dereference_protected(nht->hash_buckets[hash_val],
521 						     lockdep_is_held(&tbl->lock)));
522 	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
523 	write_unlock_bh(&tbl->lock);
524 	NEIGH_PRINTK2("neigh %p is created.\n", n);
525 	rc = n;
526 out:
527 	return rc;
528 out_tbl_unlock:
529 	write_unlock_bh(&tbl->lock);
530 out_neigh_release:
531 	neigh_release(n);
532 	goto out;
533 }
534 EXPORT_SYMBOL(neigh_create);
535 
536 static u32 pneigh_hash(const void *pkey, int key_len)
537 {
538 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
539 	hash_val ^= (hash_val >> 16);
540 	hash_val ^= hash_val >> 8;
541 	hash_val ^= hash_val >> 4;
542 	hash_val &= PNEIGH_HASHMASK;
543 	return hash_val;
544 }
545 
546 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
547 					      struct net *net,
548 					      const void *pkey,
549 					      int key_len,
550 					      struct net_device *dev)
551 {
552 	while (n) {
553 		if (!memcmp(n->key, pkey, key_len) &&
554 		    net_eq(pneigh_net(n), net) &&
555 		    (n->dev == dev || !n->dev))
556 			return n;
557 		n = n->next;
558 	}
559 	return NULL;
560 }
561 
562 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
563 		struct net *net, const void *pkey, struct net_device *dev)
564 {
565 	int key_len = tbl->key_len;
566 	u32 hash_val = pneigh_hash(pkey, key_len);
567 
568 	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
569 				 net, pkey, key_len, dev);
570 }
571 EXPORT_SYMBOL_GPL(__pneigh_lookup);
572 
573 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
574 				    struct net *net, const void *pkey,
575 				    struct net_device *dev, int creat)
576 {
577 	struct pneigh_entry *n;
578 	int key_len = tbl->key_len;
579 	u32 hash_val = pneigh_hash(pkey, key_len);
580 
581 	read_lock_bh(&tbl->lock);
582 	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
583 			      net, pkey, key_len, dev);
584 	read_unlock_bh(&tbl->lock);
585 
586 	if (n || !creat)
587 		goto out;
588 
589 	ASSERT_RTNL();
590 
591 	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
592 	if (!n)
593 		goto out;
594 
595 	write_pnet(&n->net, hold_net(net));
596 	memcpy(n->key, pkey, key_len);
597 	n->dev = dev;
598 	if (dev)
599 		dev_hold(dev);
600 
601 	if (tbl->pconstructor && tbl->pconstructor(n)) {
602 		if (dev)
603 			dev_put(dev);
604 		release_net(net);
605 		kfree(n);
606 		n = NULL;
607 		goto out;
608 	}
609 
610 	write_lock_bh(&tbl->lock);
611 	n->next = tbl->phash_buckets[hash_val];
612 	tbl->phash_buckets[hash_val] = n;
613 	write_unlock_bh(&tbl->lock);
614 out:
615 	return n;
616 }
617 EXPORT_SYMBOL(pneigh_lookup);
618 
619 
620 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
621 		  struct net_device *dev)
622 {
623 	struct pneigh_entry *n, **np;
624 	int key_len = tbl->key_len;
625 	u32 hash_val = pneigh_hash(pkey, key_len);
626 
627 	write_lock_bh(&tbl->lock);
628 	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
629 	     np = &n->next) {
630 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
631 		    net_eq(pneigh_net(n), net)) {
632 			*np = n->next;
633 			write_unlock_bh(&tbl->lock);
634 			if (tbl->pdestructor)
635 				tbl->pdestructor(n);
636 			if (n->dev)
637 				dev_put(n->dev);
638 			release_net(pneigh_net(n));
639 			kfree(n);
640 			return 0;
641 		}
642 	}
643 	write_unlock_bh(&tbl->lock);
644 	return -ENOENT;
645 }
646 
647 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
648 {
649 	struct pneigh_entry *n, **np;
650 	u32 h;
651 
652 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
653 		np = &tbl->phash_buckets[h];
654 		while ((n = *np) != NULL) {
655 			if (!dev || n->dev == dev) {
656 				*np = n->next;
657 				if (tbl->pdestructor)
658 					tbl->pdestructor(n);
659 				if (n->dev)
660 					dev_put(n->dev);
661 				release_net(pneigh_net(n));
662 				kfree(n);
663 				continue;
664 			}
665 			np = &n->next;
666 		}
667 	}
668 	return -ENOENT;
669 }
670 
671 static void neigh_parms_destroy(struct neigh_parms *parms);
672 
673 static inline void neigh_parms_put(struct neigh_parms *parms)
674 {
675 	if (atomic_dec_and_test(&parms->refcnt))
676 		neigh_parms_destroy(parms);
677 }
678 
679 static void neigh_destroy_rcu(struct rcu_head *head)
680 {
681 	struct neighbour *neigh = container_of(head, struct neighbour, rcu);
682 
683 	kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
684 }
685 /*
686  *	neighbour must already be out of the table;
687  *
688  */
689 void neigh_destroy(struct neighbour *neigh)
690 {
691 	struct hh_cache *hh;
692 
693 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
694 
695 	if (!neigh->dead) {
696 		printk(KERN_WARNING
697 		       "Destroying alive neighbour %p\n", neigh);
698 		dump_stack();
699 		return;
700 	}
701 
702 	if (neigh_del_timer(neigh))
703 		printk(KERN_WARNING "Impossible event.\n");
704 
705 	hh = neigh->hh;
706 	if (hh) {
707 		neigh->hh = NULL;
708 
709 		write_seqlock_bh(&hh->hh_lock);
710 		hh->hh_output = neigh_blackhole;
711 		write_sequnlock_bh(&hh->hh_lock);
712 		hh_cache_put(hh);
713 	}
714 
715 	skb_queue_purge(&neigh->arp_queue);
716 
717 	dev_put(neigh->dev);
718 	neigh_parms_put(neigh->parms);
719 
720 	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
721 
722 	atomic_dec(&neigh->tbl->entries);
723 	call_rcu(&neigh->rcu, neigh_destroy_rcu);
724 }
725 EXPORT_SYMBOL(neigh_destroy);
726 
727 /* Neighbour state is suspicious;
728    disable fast path.
729 
730    Called with write_locked neigh.
731  */
732 static void neigh_suspect(struct neighbour *neigh)
733 {
734 	struct hh_cache *hh;
735 
736 	NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
737 
738 	neigh->output = neigh->ops->output;
739 
740 	hh = neigh->hh;
741 	if (hh)
742 		hh->hh_output = neigh->ops->output;
743 }
744 
745 /* Neighbour state is OK;
746    enable fast path.
747 
748    Called with write_locked neigh.
749  */
750 static void neigh_connect(struct neighbour *neigh)
751 {
752 	struct hh_cache *hh;
753 
754 	NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
755 
756 	neigh->output = neigh->ops->connected_output;
757 
758 	hh = neigh->hh;
759 	if (hh)
760 		hh->hh_output = neigh->ops->hh_output;
761 }
762 
763 static void neigh_periodic_work(struct work_struct *work)
764 {
765 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
766 	struct neighbour *n;
767 	struct neighbour __rcu **np;
768 	unsigned int i;
769 	struct neigh_hash_table *nht;
770 
771 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
772 
773 	write_lock_bh(&tbl->lock);
774 	nht = rcu_dereference_protected(tbl->nht,
775 					lockdep_is_held(&tbl->lock));
776 
777 	/*
778 	 *	periodically recompute ReachableTime from random function
779 	 */
780 
781 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
782 		struct neigh_parms *p;
783 		tbl->last_rand = jiffies;
784 		for (p = &tbl->parms; p; p = p->next)
785 			p->reachable_time =
786 				neigh_rand_reach_time(p->base_reachable_time);
787 	}
788 
789 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
790 		np = &nht->hash_buckets[i];
791 
792 		while ((n = rcu_dereference_protected(*np,
793 				lockdep_is_held(&tbl->lock))) != NULL) {
794 			unsigned int state;
795 
796 			write_lock(&n->lock);
797 
798 			state = n->nud_state;
799 			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
800 				write_unlock(&n->lock);
801 				goto next_elt;
802 			}
803 
804 			if (time_before(n->used, n->confirmed))
805 				n->used = n->confirmed;
806 
807 			if (atomic_read(&n->refcnt) == 1 &&
808 			    (state == NUD_FAILED ||
809 			     time_after(jiffies, n->used + n->parms->gc_staletime))) {
810 				*np = n->next;
811 				n->dead = 1;
812 				write_unlock(&n->lock);
813 				neigh_cleanup_and_release(n);
814 				continue;
815 			}
816 			write_unlock(&n->lock);
817 
818 next_elt:
819 			np = &n->next;
820 		}
821 		/*
822 		 * It's fine to release lock here, even if hash table
823 		 * grows while we are preempted.
824 		 */
825 		write_unlock_bh(&tbl->lock);
826 		cond_resched();
827 		write_lock_bh(&tbl->lock);
828 	}
829 	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
830 	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
831 	 * base_reachable_time.
832 	 */
833 	schedule_delayed_work(&tbl->gc_work,
834 			      tbl->parms.base_reachable_time >> 1);
835 	write_unlock_bh(&tbl->lock);
836 }
837 
838 static __inline__ int neigh_max_probes(struct neighbour *n)
839 {
840 	struct neigh_parms *p = n->parms;
841 	return (n->nud_state & NUD_PROBE) ?
842 		p->ucast_probes :
843 		p->ucast_probes + p->app_probes + p->mcast_probes;
844 }
845 
846 static void neigh_invalidate(struct neighbour *neigh)
847 	__releases(neigh->lock)
848 	__acquires(neigh->lock)
849 {
850 	struct sk_buff *skb;
851 
852 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
853 	NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
854 	neigh->updated = jiffies;
855 
856 	/* It is very thin place. report_unreachable is very complicated
857 	   routine. Particularly, it can hit the same neighbour entry!
858 
859 	   So that, we try to be accurate and avoid dead loop. --ANK
860 	 */
861 	while (neigh->nud_state == NUD_FAILED &&
862 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
863 		write_unlock(&neigh->lock);
864 		neigh->ops->error_report(neigh, skb);
865 		write_lock(&neigh->lock);
866 	}
867 	skb_queue_purge(&neigh->arp_queue);
868 }
869 
870 /* Called when a timer expires for a neighbour entry. */
871 
872 static void neigh_timer_handler(unsigned long arg)
873 {
874 	unsigned long now, next;
875 	struct neighbour *neigh = (struct neighbour *)arg;
876 	unsigned state;
877 	int notify = 0;
878 
879 	write_lock(&neigh->lock);
880 
881 	state = neigh->nud_state;
882 	now = jiffies;
883 	next = now + HZ;
884 
885 	if (!(state & NUD_IN_TIMER)) {
886 #ifndef CONFIG_SMP
887 		printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
888 #endif
889 		goto out;
890 	}
891 
892 	if (state & NUD_REACHABLE) {
893 		if (time_before_eq(now,
894 				   neigh->confirmed + neigh->parms->reachable_time)) {
895 			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
896 			next = neigh->confirmed + neigh->parms->reachable_time;
897 		} else if (time_before_eq(now,
898 					  neigh->used + neigh->parms->delay_probe_time)) {
899 			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
900 			neigh->nud_state = NUD_DELAY;
901 			neigh->updated = jiffies;
902 			neigh_suspect(neigh);
903 			next = now + neigh->parms->delay_probe_time;
904 		} else {
905 			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
906 			neigh->nud_state = NUD_STALE;
907 			neigh->updated = jiffies;
908 			neigh_suspect(neigh);
909 			notify = 1;
910 		}
911 	} else if (state & NUD_DELAY) {
912 		if (time_before_eq(now,
913 				   neigh->confirmed + neigh->parms->delay_probe_time)) {
914 			NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
915 			neigh->nud_state = NUD_REACHABLE;
916 			neigh->updated = jiffies;
917 			neigh_connect(neigh);
918 			notify = 1;
919 			next = neigh->confirmed + neigh->parms->reachable_time;
920 		} else {
921 			NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
922 			neigh->nud_state = NUD_PROBE;
923 			neigh->updated = jiffies;
924 			atomic_set(&neigh->probes, 0);
925 			next = now + neigh->parms->retrans_time;
926 		}
927 	} else {
928 		/* NUD_PROBE|NUD_INCOMPLETE */
929 		next = now + neigh->parms->retrans_time;
930 	}
931 
932 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
933 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
934 		neigh->nud_state = NUD_FAILED;
935 		notify = 1;
936 		neigh_invalidate(neigh);
937 	}
938 
939 	if (neigh->nud_state & NUD_IN_TIMER) {
940 		if (time_before(next, jiffies + HZ/2))
941 			next = jiffies + HZ/2;
942 		if (!mod_timer(&neigh->timer, next))
943 			neigh_hold(neigh);
944 	}
945 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
946 		struct sk_buff *skb = skb_peek(&neigh->arp_queue);
947 		/* keep skb alive even if arp_queue overflows */
948 		if (skb)
949 			skb = skb_copy(skb, GFP_ATOMIC);
950 		write_unlock(&neigh->lock);
951 		neigh->ops->solicit(neigh, skb);
952 		atomic_inc(&neigh->probes);
953 		kfree_skb(skb);
954 	} else {
955 out:
956 		write_unlock(&neigh->lock);
957 	}
958 
959 	if (notify)
960 		neigh_update_notify(neigh);
961 
962 	neigh_release(neigh);
963 }
964 
965 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
966 {
967 	int rc;
968 	unsigned long now;
969 
970 	write_lock_bh(&neigh->lock);
971 
972 	rc = 0;
973 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
974 		goto out_unlock_bh;
975 
976 	now = jiffies;
977 
978 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
979 		if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
980 			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
981 			neigh->nud_state     = NUD_INCOMPLETE;
982 			neigh->updated = jiffies;
983 			neigh_add_timer(neigh, now + 1);
984 		} else {
985 			neigh->nud_state = NUD_FAILED;
986 			neigh->updated = jiffies;
987 			write_unlock_bh(&neigh->lock);
988 
989 			kfree_skb(skb);
990 			return 1;
991 		}
992 	} else if (neigh->nud_state & NUD_STALE) {
993 		NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
994 		neigh->nud_state = NUD_DELAY;
995 		neigh->updated = jiffies;
996 		neigh_add_timer(neigh,
997 				jiffies + neigh->parms->delay_probe_time);
998 	}
999 
1000 	if (neigh->nud_state == NUD_INCOMPLETE) {
1001 		if (skb) {
1002 			if (skb_queue_len(&neigh->arp_queue) >=
1003 			    neigh->parms->queue_len) {
1004 				struct sk_buff *buff;
1005 				buff = __skb_dequeue(&neigh->arp_queue);
1006 				kfree_skb(buff);
1007 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1008 			}
1009 			skb_dst_force(skb);
1010 			__skb_queue_tail(&neigh->arp_queue, skb);
1011 		}
1012 		rc = 1;
1013 	}
1014 out_unlock_bh:
1015 	write_unlock_bh(&neigh->lock);
1016 	return rc;
1017 }
1018 EXPORT_SYMBOL(__neigh_event_send);
1019 
1020 static void neigh_update_hhs(const struct neighbour *neigh)
1021 {
1022 	struct hh_cache *hh;
1023 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1024 		= NULL;
1025 
1026 	if (neigh->dev->header_ops)
1027 		update = neigh->dev->header_ops->cache_update;
1028 
1029 	if (update) {
1030 		hh = neigh->hh;
1031 		if (hh) {
1032 			write_seqlock_bh(&hh->hh_lock);
1033 			update(hh, neigh->dev, neigh->ha);
1034 			write_sequnlock_bh(&hh->hh_lock);
1035 		}
1036 	}
1037 }
1038 
1039 
1040 
1041 /* Generic update routine.
1042    -- lladdr is new lladdr or NULL, if it is not supplied.
1043    -- new    is new state.
1044    -- flags
1045 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1046 				if it is different.
1047 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1048 				lladdr instead of overriding it
1049 				if it is different.
1050 				It also allows to retain current state
1051 				if lladdr is unchanged.
1052 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1053 
1054 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1055 				NTF_ROUTER flag.
1056 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1057 				a router.
1058 
1059    Caller MUST hold reference count on the entry.
1060  */
1061 
1062 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1063 		 u32 flags)
1064 {
1065 	u8 old;
1066 	int err;
1067 	int notify = 0;
1068 	struct net_device *dev;
1069 	int update_isrouter = 0;
1070 
1071 	write_lock_bh(&neigh->lock);
1072 
1073 	dev    = neigh->dev;
1074 	old    = neigh->nud_state;
1075 	err    = -EPERM;
1076 
1077 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1078 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1079 		goto out;
1080 
1081 	if (!(new & NUD_VALID)) {
1082 		neigh_del_timer(neigh);
1083 		if (old & NUD_CONNECTED)
1084 			neigh_suspect(neigh);
1085 		neigh->nud_state = new;
1086 		err = 0;
1087 		notify = old & NUD_VALID;
1088 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1089 		    (new & NUD_FAILED)) {
1090 			neigh_invalidate(neigh);
1091 			notify = 1;
1092 		}
1093 		goto out;
1094 	}
1095 
1096 	/* Compare new lladdr with cached one */
1097 	if (!dev->addr_len) {
1098 		/* First case: device needs no address. */
1099 		lladdr = neigh->ha;
1100 	} else if (lladdr) {
1101 		/* The second case: if something is already cached
1102 		   and a new address is proposed:
1103 		   - compare new & old
1104 		   - if they are different, check override flag
1105 		 */
1106 		if ((old & NUD_VALID) &&
1107 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1108 			lladdr = neigh->ha;
1109 	} else {
1110 		/* No address is supplied; if we know something,
1111 		   use it, otherwise discard the request.
1112 		 */
1113 		err = -EINVAL;
1114 		if (!(old & NUD_VALID))
1115 			goto out;
1116 		lladdr = neigh->ha;
1117 	}
1118 
1119 	if (new & NUD_CONNECTED)
1120 		neigh->confirmed = jiffies;
1121 	neigh->updated = jiffies;
1122 
1123 	/* If entry was valid and address is not changed,
1124 	   do not change entry state, if new one is STALE.
1125 	 */
1126 	err = 0;
1127 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1128 	if (old & NUD_VALID) {
1129 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1130 			update_isrouter = 0;
1131 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1132 			    (old & NUD_CONNECTED)) {
1133 				lladdr = neigh->ha;
1134 				new = NUD_STALE;
1135 			} else
1136 				goto out;
1137 		} else {
1138 			if (lladdr == neigh->ha && new == NUD_STALE &&
1139 			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1140 			     (old & NUD_CONNECTED))
1141 			    )
1142 				new = old;
1143 		}
1144 	}
1145 
1146 	if (new != old) {
1147 		neigh_del_timer(neigh);
1148 		if (new & NUD_IN_TIMER)
1149 			neigh_add_timer(neigh, (jiffies +
1150 						((new & NUD_REACHABLE) ?
1151 						 neigh->parms->reachable_time :
1152 						 0)));
1153 		neigh->nud_state = new;
1154 	}
1155 
1156 	if (lladdr != neigh->ha) {
1157 		write_seqlock(&neigh->ha_lock);
1158 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1159 		write_sequnlock(&neigh->ha_lock);
1160 		neigh_update_hhs(neigh);
1161 		if (!(new & NUD_CONNECTED))
1162 			neigh->confirmed = jiffies -
1163 				      (neigh->parms->base_reachable_time << 1);
1164 		notify = 1;
1165 	}
1166 	if (new == old)
1167 		goto out;
1168 	if (new & NUD_CONNECTED)
1169 		neigh_connect(neigh);
1170 	else
1171 		neigh_suspect(neigh);
1172 	if (!(old & NUD_VALID)) {
1173 		struct sk_buff *skb;
1174 
1175 		/* Again: avoid dead loop if something went wrong */
1176 
1177 		while (neigh->nud_state & NUD_VALID &&
1178 		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1179 			struct neighbour *n1 = neigh;
1180 			write_unlock_bh(&neigh->lock);
1181 			/* On shaper/eql skb->dst->neighbour != neigh :( */
1182 			if (skb_dst(skb) && skb_dst(skb)->neighbour)
1183 				n1 = skb_dst(skb)->neighbour;
1184 			n1->output(skb);
1185 			write_lock_bh(&neigh->lock);
1186 		}
1187 		skb_queue_purge(&neigh->arp_queue);
1188 	}
1189 out:
1190 	if (update_isrouter) {
1191 		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1192 			(neigh->flags | NTF_ROUTER) :
1193 			(neigh->flags & ~NTF_ROUTER);
1194 	}
1195 	write_unlock_bh(&neigh->lock);
1196 
1197 	if (notify)
1198 		neigh_update_notify(neigh);
1199 
1200 	return err;
1201 }
1202 EXPORT_SYMBOL(neigh_update);
1203 
1204 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1205 				 u8 *lladdr, void *saddr,
1206 				 struct net_device *dev)
1207 {
1208 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1209 						 lladdr || !dev->addr_len);
1210 	if (neigh)
1211 		neigh_update(neigh, lladdr, NUD_STALE,
1212 			     NEIGH_UPDATE_F_OVERRIDE);
1213 	return neigh;
1214 }
1215 EXPORT_SYMBOL(neigh_event_ns);
1216 
1217 static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst)
1218 {
1219 	struct hh_cache *hh;
1220 
1221 	smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
1222 	hh = n->hh;
1223 	if (hh) {
1224 		atomic_inc(&hh->hh_refcnt);
1225 		if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
1226 			hh_cache_put(hh);
1227 		return true;
1228 	}
1229 	return false;
1230 }
1231 
1232 /* called with read_lock_bh(&n->lock); */
1233 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
1234 			  __be16 protocol)
1235 {
1236 	struct hh_cache	*hh;
1237 	struct net_device *dev = dst->dev;
1238 
1239 	if (likely(neigh_hh_lookup(n, dst)))
1240 		return;
1241 
1242 	/* slow path */
1243 	hh = kzalloc(sizeof(*hh), GFP_ATOMIC);
1244 	if (!hh)
1245 		return;
1246 
1247 	seqlock_init(&hh->hh_lock);
1248 	atomic_set(&hh->hh_refcnt, 2);
1249 
1250 	if (dev->header_ops->cache(n, hh, protocol)) {
1251 		kfree(hh);
1252 		return;
1253 	}
1254 
1255 	write_lock_bh(&n->lock);
1256 
1257 	/* must check if another thread already did the insert */
1258 	if (neigh_hh_lookup(n, dst)) {
1259 		kfree(hh);
1260 		goto end;
1261 	}
1262 
1263 	if (n->nud_state & NUD_CONNECTED)
1264 		hh->hh_output = n->ops->hh_output;
1265 	else
1266 		hh->hh_output = n->ops->output;
1267 
1268 	smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
1269 	n->hh	    = hh;
1270 
1271 	if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
1272 		hh_cache_put(hh);
1273 end:
1274 	write_unlock_bh(&n->lock);
1275 }
1276 
1277 /* This function can be used in contexts, where only old dev_queue_xmit
1278  * worked, f.e. if you want to override normal output path (eql, shaper),
1279  * but resolution is not made yet.
1280  */
1281 
1282 int neigh_compat_output(struct sk_buff *skb)
1283 {
1284 	struct net_device *dev = skb->dev;
1285 
1286 	__skb_pull(skb, skb_network_offset(skb));
1287 
1288 	if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1289 			    skb->len) < 0 &&
1290 	    dev->header_ops->rebuild(skb))
1291 		return 0;
1292 
1293 	return dev_queue_xmit(skb);
1294 }
1295 EXPORT_SYMBOL(neigh_compat_output);
1296 
1297 /* Slow and careful. */
1298 
1299 int neigh_resolve_output(struct sk_buff *skb)
1300 {
1301 	struct dst_entry *dst = skb_dst(skb);
1302 	struct neighbour *neigh;
1303 	int rc = 0;
1304 
1305 	if (!dst || !(neigh = dst->neighbour))
1306 		goto discard;
1307 
1308 	__skb_pull(skb, skb_network_offset(skb));
1309 
1310 	if (!neigh_event_send(neigh, skb)) {
1311 		int err;
1312 		struct net_device *dev = neigh->dev;
1313 		unsigned int seq;
1314 
1315 		if (dev->header_ops->cache &&
1316 		    !dst->hh &&
1317 		    !(dst->flags & DST_NOCACHE))
1318 			neigh_hh_init(neigh, dst, dst->ops->protocol);
1319 
1320 		do {
1321 			seq = read_seqbegin(&neigh->ha_lock);
1322 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1323 					      neigh->ha, NULL, skb->len);
1324 		} while (read_seqretry(&neigh->ha_lock, seq));
1325 
1326 		if (err >= 0)
1327 			rc = neigh->ops->queue_xmit(skb);
1328 		else
1329 			goto out_kfree_skb;
1330 	}
1331 out:
1332 	return rc;
1333 discard:
1334 	NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1335 		      dst, dst ? dst->neighbour : NULL);
1336 out_kfree_skb:
1337 	rc = -EINVAL;
1338 	kfree_skb(skb);
1339 	goto out;
1340 }
1341 EXPORT_SYMBOL(neigh_resolve_output);
1342 
1343 /* As fast as possible without hh cache */
1344 
1345 int neigh_connected_output(struct sk_buff *skb)
1346 {
1347 	int err;
1348 	struct dst_entry *dst = skb_dst(skb);
1349 	struct neighbour *neigh = dst->neighbour;
1350 	struct net_device *dev = neigh->dev;
1351 	unsigned int seq;
1352 
1353 	__skb_pull(skb, skb_network_offset(skb));
1354 
1355 	do {
1356 		seq = read_seqbegin(&neigh->ha_lock);
1357 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1358 				      neigh->ha, NULL, skb->len);
1359 	} while (read_seqretry(&neigh->ha_lock, seq));
1360 
1361 	if (err >= 0)
1362 		err = neigh->ops->queue_xmit(skb);
1363 	else {
1364 		err = -EINVAL;
1365 		kfree_skb(skb);
1366 	}
1367 	return err;
1368 }
1369 EXPORT_SYMBOL(neigh_connected_output);
1370 
1371 static void neigh_proxy_process(unsigned long arg)
1372 {
1373 	struct neigh_table *tbl = (struct neigh_table *)arg;
1374 	long sched_next = 0;
1375 	unsigned long now = jiffies;
1376 	struct sk_buff *skb, *n;
1377 
1378 	spin_lock(&tbl->proxy_queue.lock);
1379 
1380 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1381 		long tdif = NEIGH_CB(skb)->sched_next - now;
1382 
1383 		if (tdif <= 0) {
1384 			struct net_device *dev = skb->dev;
1385 			__skb_unlink(skb, &tbl->proxy_queue);
1386 			if (tbl->proxy_redo && netif_running(dev))
1387 				tbl->proxy_redo(skb);
1388 			else
1389 				kfree_skb(skb);
1390 
1391 			dev_put(dev);
1392 		} else if (!sched_next || tdif < sched_next)
1393 			sched_next = tdif;
1394 	}
1395 	del_timer(&tbl->proxy_timer);
1396 	if (sched_next)
1397 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1398 	spin_unlock(&tbl->proxy_queue.lock);
1399 }
1400 
1401 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1402 		    struct sk_buff *skb)
1403 {
1404 	unsigned long now = jiffies;
1405 	unsigned long sched_next = now + (net_random() % p->proxy_delay);
1406 
1407 	if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1408 		kfree_skb(skb);
1409 		return;
1410 	}
1411 
1412 	NEIGH_CB(skb)->sched_next = sched_next;
1413 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1414 
1415 	spin_lock(&tbl->proxy_queue.lock);
1416 	if (del_timer(&tbl->proxy_timer)) {
1417 		if (time_before(tbl->proxy_timer.expires, sched_next))
1418 			sched_next = tbl->proxy_timer.expires;
1419 	}
1420 	skb_dst_drop(skb);
1421 	dev_hold(skb->dev);
1422 	__skb_queue_tail(&tbl->proxy_queue, skb);
1423 	mod_timer(&tbl->proxy_timer, sched_next);
1424 	spin_unlock(&tbl->proxy_queue.lock);
1425 }
1426 EXPORT_SYMBOL(pneigh_enqueue);
1427 
1428 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1429 						      struct net *net, int ifindex)
1430 {
1431 	struct neigh_parms *p;
1432 
1433 	for (p = &tbl->parms; p; p = p->next) {
1434 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1435 		    (!p->dev && !ifindex))
1436 			return p;
1437 	}
1438 
1439 	return NULL;
1440 }
1441 
1442 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1443 				      struct neigh_table *tbl)
1444 {
1445 	struct neigh_parms *p, *ref;
1446 	struct net *net = dev_net(dev);
1447 	const struct net_device_ops *ops = dev->netdev_ops;
1448 
1449 	ref = lookup_neigh_parms(tbl, net, 0);
1450 	if (!ref)
1451 		return NULL;
1452 
1453 	p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1454 	if (p) {
1455 		p->tbl		  = tbl;
1456 		atomic_set(&p->refcnt, 1);
1457 		p->reachable_time =
1458 				neigh_rand_reach_time(p->base_reachable_time);
1459 
1460 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1461 			kfree(p);
1462 			return NULL;
1463 		}
1464 
1465 		dev_hold(dev);
1466 		p->dev = dev;
1467 		write_pnet(&p->net, hold_net(net));
1468 		p->sysctl_table = NULL;
1469 		write_lock_bh(&tbl->lock);
1470 		p->next		= tbl->parms.next;
1471 		tbl->parms.next = p;
1472 		write_unlock_bh(&tbl->lock);
1473 	}
1474 	return p;
1475 }
1476 EXPORT_SYMBOL(neigh_parms_alloc);
1477 
1478 static void neigh_rcu_free_parms(struct rcu_head *head)
1479 {
1480 	struct neigh_parms *parms =
1481 		container_of(head, struct neigh_parms, rcu_head);
1482 
1483 	neigh_parms_put(parms);
1484 }
1485 
1486 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1487 {
1488 	struct neigh_parms **p;
1489 
1490 	if (!parms || parms == &tbl->parms)
1491 		return;
1492 	write_lock_bh(&tbl->lock);
1493 	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1494 		if (*p == parms) {
1495 			*p = parms->next;
1496 			parms->dead = 1;
1497 			write_unlock_bh(&tbl->lock);
1498 			if (parms->dev)
1499 				dev_put(parms->dev);
1500 			call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1501 			return;
1502 		}
1503 	}
1504 	write_unlock_bh(&tbl->lock);
1505 	NEIGH_PRINTK1("neigh_parms_release: not found\n");
1506 }
1507 EXPORT_SYMBOL(neigh_parms_release);
1508 
1509 static void neigh_parms_destroy(struct neigh_parms *parms)
1510 {
1511 	release_net(neigh_parms_net(parms));
1512 	kfree(parms);
1513 }
1514 
1515 static struct lock_class_key neigh_table_proxy_queue_class;
1516 
1517 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1518 {
1519 	unsigned long now = jiffies;
1520 	unsigned long phsize;
1521 
1522 	write_pnet(&tbl->parms.net, &init_net);
1523 	atomic_set(&tbl->parms.refcnt, 1);
1524 	tbl->parms.reachable_time =
1525 			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
1526 
1527 	if (!tbl->kmem_cachep)
1528 		tbl->kmem_cachep =
1529 			kmem_cache_create(tbl->id, tbl->entry_size, 0,
1530 					  SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1531 					  NULL);
1532 	tbl->stats = alloc_percpu(struct neigh_statistics);
1533 	if (!tbl->stats)
1534 		panic("cannot create neighbour cache statistics");
1535 
1536 #ifdef CONFIG_PROC_FS
1537 	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1538 			      &neigh_stat_seq_fops, tbl))
1539 		panic("cannot create neighbour proc dir entry");
1540 #endif
1541 
1542 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1543 
1544 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1545 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1546 
1547 	if (!tbl->nht || !tbl->phash_buckets)
1548 		panic("cannot allocate neighbour cache hashes");
1549 
1550 	rwlock_init(&tbl->lock);
1551 	INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1552 	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1553 	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1554 	skb_queue_head_init_class(&tbl->proxy_queue,
1555 			&neigh_table_proxy_queue_class);
1556 
1557 	tbl->last_flush = now;
1558 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1559 }
1560 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1561 
1562 void neigh_table_init(struct neigh_table *tbl)
1563 {
1564 	struct neigh_table *tmp;
1565 
1566 	neigh_table_init_no_netlink(tbl);
1567 	write_lock(&neigh_tbl_lock);
1568 	for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1569 		if (tmp->family == tbl->family)
1570 			break;
1571 	}
1572 	tbl->next	= neigh_tables;
1573 	neigh_tables	= tbl;
1574 	write_unlock(&neigh_tbl_lock);
1575 
1576 	if (unlikely(tmp)) {
1577 		printk(KERN_ERR "NEIGH: Registering multiple tables for "
1578 		       "family %d\n", tbl->family);
1579 		dump_stack();
1580 	}
1581 }
1582 EXPORT_SYMBOL(neigh_table_init);
1583 
1584 int neigh_table_clear(struct neigh_table *tbl)
1585 {
1586 	struct neigh_table **tp;
1587 
1588 	/* It is not clean... Fix it to unload IPv6 module safely */
1589 	cancel_delayed_work_sync(&tbl->gc_work);
1590 	del_timer_sync(&tbl->proxy_timer);
1591 	pneigh_queue_purge(&tbl->proxy_queue);
1592 	neigh_ifdown(tbl, NULL);
1593 	if (atomic_read(&tbl->entries))
1594 		printk(KERN_CRIT "neighbour leakage\n");
1595 	write_lock(&neigh_tbl_lock);
1596 	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1597 		if (*tp == tbl) {
1598 			*tp = tbl->next;
1599 			break;
1600 		}
1601 	}
1602 	write_unlock(&neigh_tbl_lock);
1603 
1604 	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1605 		 neigh_hash_free_rcu);
1606 	tbl->nht = NULL;
1607 
1608 	kfree(tbl->phash_buckets);
1609 	tbl->phash_buckets = NULL;
1610 
1611 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1612 
1613 	free_percpu(tbl->stats);
1614 	tbl->stats = NULL;
1615 
1616 	kmem_cache_destroy(tbl->kmem_cachep);
1617 	tbl->kmem_cachep = NULL;
1618 
1619 	return 0;
1620 }
1621 EXPORT_SYMBOL(neigh_table_clear);
1622 
1623 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1624 {
1625 	struct net *net = sock_net(skb->sk);
1626 	struct ndmsg *ndm;
1627 	struct nlattr *dst_attr;
1628 	struct neigh_table *tbl;
1629 	struct net_device *dev = NULL;
1630 	int err = -EINVAL;
1631 
1632 	ASSERT_RTNL();
1633 	if (nlmsg_len(nlh) < sizeof(*ndm))
1634 		goto out;
1635 
1636 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1637 	if (dst_attr == NULL)
1638 		goto out;
1639 
1640 	ndm = nlmsg_data(nlh);
1641 	if (ndm->ndm_ifindex) {
1642 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1643 		if (dev == NULL) {
1644 			err = -ENODEV;
1645 			goto out;
1646 		}
1647 	}
1648 
1649 	read_lock(&neigh_tbl_lock);
1650 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1651 		struct neighbour *neigh;
1652 
1653 		if (tbl->family != ndm->ndm_family)
1654 			continue;
1655 		read_unlock(&neigh_tbl_lock);
1656 
1657 		if (nla_len(dst_attr) < tbl->key_len)
1658 			goto out;
1659 
1660 		if (ndm->ndm_flags & NTF_PROXY) {
1661 			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1662 			goto out;
1663 		}
1664 
1665 		if (dev == NULL)
1666 			goto out;
1667 
1668 		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1669 		if (neigh == NULL) {
1670 			err = -ENOENT;
1671 			goto out;
1672 		}
1673 
1674 		err = neigh_update(neigh, NULL, NUD_FAILED,
1675 				   NEIGH_UPDATE_F_OVERRIDE |
1676 				   NEIGH_UPDATE_F_ADMIN);
1677 		neigh_release(neigh);
1678 		goto out;
1679 	}
1680 	read_unlock(&neigh_tbl_lock);
1681 	err = -EAFNOSUPPORT;
1682 
1683 out:
1684 	return err;
1685 }
1686 
1687 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1688 {
1689 	struct net *net = sock_net(skb->sk);
1690 	struct ndmsg *ndm;
1691 	struct nlattr *tb[NDA_MAX+1];
1692 	struct neigh_table *tbl;
1693 	struct net_device *dev = NULL;
1694 	int err;
1695 
1696 	ASSERT_RTNL();
1697 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1698 	if (err < 0)
1699 		goto out;
1700 
1701 	err = -EINVAL;
1702 	if (tb[NDA_DST] == NULL)
1703 		goto out;
1704 
1705 	ndm = nlmsg_data(nlh);
1706 	if (ndm->ndm_ifindex) {
1707 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1708 		if (dev == NULL) {
1709 			err = -ENODEV;
1710 			goto out;
1711 		}
1712 
1713 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1714 			goto out;
1715 	}
1716 
1717 	read_lock(&neigh_tbl_lock);
1718 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1719 		int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1720 		struct neighbour *neigh;
1721 		void *dst, *lladdr;
1722 
1723 		if (tbl->family != ndm->ndm_family)
1724 			continue;
1725 		read_unlock(&neigh_tbl_lock);
1726 
1727 		if (nla_len(tb[NDA_DST]) < tbl->key_len)
1728 			goto out;
1729 		dst = nla_data(tb[NDA_DST]);
1730 		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1731 
1732 		if (ndm->ndm_flags & NTF_PROXY) {
1733 			struct pneigh_entry *pn;
1734 
1735 			err = -ENOBUFS;
1736 			pn = pneigh_lookup(tbl, net, dst, dev, 1);
1737 			if (pn) {
1738 				pn->flags = ndm->ndm_flags;
1739 				err = 0;
1740 			}
1741 			goto out;
1742 		}
1743 
1744 		if (dev == NULL)
1745 			goto out;
1746 
1747 		neigh = neigh_lookup(tbl, dst, dev);
1748 		if (neigh == NULL) {
1749 			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1750 				err = -ENOENT;
1751 				goto out;
1752 			}
1753 
1754 			neigh = __neigh_lookup_errno(tbl, dst, dev);
1755 			if (IS_ERR(neigh)) {
1756 				err = PTR_ERR(neigh);
1757 				goto out;
1758 			}
1759 		} else {
1760 			if (nlh->nlmsg_flags & NLM_F_EXCL) {
1761 				err = -EEXIST;
1762 				neigh_release(neigh);
1763 				goto out;
1764 			}
1765 
1766 			if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1767 				flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1768 		}
1769 
1770 		if (ndm->ndm_flags & NTF_USE) {
1771 			neigh_event_send(neigh, NULL);
1772 			err = 0;
1773 		} else
1774 			err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1775 		neigh_release(neigh);
1776 		goto out;
1777 	}
1778 
1779 	read_unlock(&neigh_tbl_lock);
1780 	err = -EAFNOSUPPORT;
1781 out:
1782 	return err;
1783 }
1784 
1785 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1786 {
1787 	struct nlattr *nest;
1788 
1789 	nest = nla_nest_start(skb, NDTA_PARMS);
1790 	if (nest == NULL)
1791 		return -ENOBUFS;
1792 
1793 	if (parms->dev)
1794 		NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1795 
1796 	NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1797 	NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1798 	NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1799 	NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1800 	NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1801 	NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1802 	NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1803 	NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1804 		      parms->base_reachable_time);
1805 	NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1806 	NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1807 	NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1808 	NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1809 	NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1810 	NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1811 
1812 	return nla_nest_end(skb, nest);
1813 
1814 nla_put_failure:
1815 	nla_nest_cancel(skb, nest);
1816 	return -EMSGSIZE;
1817 }
1818 
1819 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1820 			      u32 pid, u32 seq, int type, int flags)
1821 {
1822 	struct nlmsghdr *nlh;
1823 	struct ndtmsg *ndtmsg;
1824 
1825 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1826 	if (nlh == NULL)
1827 		return -EMSGSIZE;
1828 
1829 	ndtmsg = nlmsg_data(nlh);
1830 
1831 	read_lock_bh(&tbl->lock);
1832 	ndtmsg->ndtm_family = tbl->family;
1833 	ndtmsg->ndtm_pad1   = 0;
1834 	ndtmsg->ndtm_pad2   = 0;
1835 
1836 	NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1837 	NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1838 	NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1839 	NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1840 	NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1841 
1842 	{
1843 		unsigned long now = jiffies;
1844 		unsigned int flush_delta = now - tbl->last_flush;
1845 		unsigned int rand_delta = now - tbl->last_rand;
1846 		struct neigh_hash_table *nht;
1847 		struct ndt_config ndc = {
1848 			.ndtc_key_len		= tbl->key_len,
1849 			.ndtc_entry_size	= tbl->entry_size,
1850 			.ndtc_entries		= atomic_read(&tbl->entries),
1851 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
1852 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
1853 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
1854 		};
1855 
1856 		rcu_read_lock_bh();
1857 		nht = rcu_dereference_bh(tbl->nht);
1858 		ndc.ndtc_hash_rnd = nht->hash_rnd;
1859 		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1860 		rcu_read_unlock_bh();
1861 
1862 		NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1863 	}
1864 
1865 	{
1866 		int cpu;
1867 		struct ndt_stats ndst;
1868 
1869 		memset(&ndst, 0, sizeof(ndst));
1870 
1871 		for_each_possible_cpu(cpu) {
1872 			struct neigh_statistics	*st;
1873 
1874 			st = per_cpu_ptr(tbl->stats, cpu);
1875 			ndst.ndts_allocs		+= st->allocs;
1876 			ndst.ndts_destroys		+= st->destroys;
1877 			ndst.ndts_hash_grows		+= st->hash_grows;
1878 			ndst.ndts_res_failed		+= st->res_failed;
1879 			ndst.ndts_lookups		+= st->lookups;
1880 			ndst.ndts_hits			+= st->hits;
1881 			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
1882 			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
1883 			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
1884 			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
1885 		}
1886 
1887 		NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1888 	}
1889 
1890 	BUG_ON(tbl->parms.dev);
1891 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1892 		goto nla_put_failure;
1893 
1894 	read_unlock_bh(&tbl->lock);
1895 	return nlmsg_end(skb, nlh);
1896 
1897 nla_put_failure:
1898 	read_unlock_bh(&tbl->lock);
1899 	nlmsg_cancel(skb, nlh);
1900 	return -EMSGSIZE;
1901 }
1902 
1903 static int neightbl_fill_param_info(struct sk_buff *skb,
1904 				    struct neigh_table *tbl,
1905 				    struct neigh_parms *parms,
1906 				    u32 pid, u32 seq, int type,
1907 				    unsigned int flags)
1908 {
1909 	struct ndtmsg *ndtmsg;
1910 	struct nlmsghdr *nlh;
1911 
1912 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1913 	if (nlh == NULL)
1914 		return -EMSGSIZE;
1915 
1916 	ndtmsg = nlmsg_data(nlh);
1917 
1918 	read_lock_bh(&tbl->lock);
1919 	ndtmsg->ndtm_family = tbl->family;
1920 	ndtmsg->ndtm_pad1   = 0;
1921 	ndtmsg->ndtm_pad2   = 0;
1922 
1923 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1924 	    neightbl_fill_parms(skb, parms) < 0)
1925 		goto errout;
1926 
1927 	read_unlock_bh(&tbl->lock);
1928 	return nlmsg_end(skb, nlh);
1929 errout:
1930 	read_unlock_bh(&tbl->lock);
1931 	nlmsg_cancel(skb, nlh);
1932 	return -EMSGSIZE;
1933 }
1934 
1935 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1936 	[NDTA_NAME]		= { .type = NLA_STRING },
1937 	[NDTA_THRESH1]		= { .type = NLA_U32 },
1938 	[NDTA_THRESH2]		= { .type = NLA_U32 },
1939 	[NDTA_THRESH3]		= { .type = NLA_U32 },
1940 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
1941 	[NDTA_PARMS]		= { .type = NLA_NESTED },
1942 };
1943 
1944 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1945 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
1946 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
1947 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
1948 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
1949 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
1950 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
1951 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
1952 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
1953 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
1954 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
1955 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
1956 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
1957 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
1958 };
1959 
1960 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1961 {
1962 	struct net *net = sock_net(skb->sk);
1963 	struct neigh_table *tbl;
1964 	struct ndtmsg *ndtmsg;
1965 	struct nlattr *tb[NDTA_MAX+1];
1966 	int err;
1967 
1968 	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1969 			  nl_neightbl_policy);
1970 	if (err < 0)
1971 		goto errout;
1972 
1973 	if (tb[NDTA_NAME] == NULL) {
1974 		err = -EINVAL;
1975 		goto errout;
1976 	}
1977 
1978 	ndtmsg = nlmsg_data(nlh);
1979 	read_lock(&neigh_tbl_lock);
1980 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1981 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1982 			continue;
1983 
1984 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1985 			break;
1986 	}
1987 
1988 	if (tbl == NULL) {
1989 		err = -ENOENT;
1990 		goto errout_locked;
1991 	}
1992 
1993 	/*
1994 	 * We acquire tbl->lock to be nice to the periodic timers and
1995 	 * make sure they always see a consistent set of values.
1996 	 */
1997 	write_lock_bh(&tbl->lock);
1998 
1999 	if (tb[NDTA_PARMS]) {
2000 		struct nlattr *tbp[NDTPA_MAX+1];
2001 		struct neigh_parms *p;
2002 		int i, ifindex = 0;
2003 
2004 		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2005 				       nl_ntbl_parm_policy);
2006 		if (err < 0)
2007 			goto errout_tbl_lock;
2008 
2009 		if (tbp[NDTPA_IFINDEX])
2010 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2011 
2012 		p = lookup_neigh_parms(tbl, net, ifindex);
2013 		if (p == NULL) {
2014 			err = -ENOENT;
2015 			goto errout_tbl_lock;
2016 		}
2017 
2018 		for (i = 1; i <= NDTPA_MAX; i++) {
2019 			if (tbp[i] == NULL)
2020 				continue;
2021 
2022 			switch (i) {
2023 			case NDTPA_QUEUE_LEN:
2024 				p->queue_len = nla_get_u32(tbp[i]);
2025 				break;
2026 			case NDTPA_PROXY_QLEN:
2027 				p->proxy_qlen = nla_get_u32(tbp[i]);
2028 				break;
2029 			case NDTPA_APP_PROBES:
2030 				p->app_probes = nla_get_u32(tbp[i]);
2031 				break;
2032 			case NDTPA_UCAST_PROBES:
2033 				p->ucast_probes = nla_get_u32(tbp[i]);
2034 				break;
2035 			case NDTPA_MCAST_PROBES:
2036 				p->mcast_probes = nla_get_u32(tbp[i]);
2037 				break;
2038 			case NDTPA_BASE_REACHABLE_TIME:
2039 				p->base_reachable_time = nla_get_msecs(tbp[i]);
2040 				break;
2041 			case NDTPA_GC_STALETIME:
2042 				p->gc_staletime = nla_get_msecs(tbp[i]);
2043 				break;
2044 			case NDTPA_DELAY_PROBE_TIME:
2045 				p->delay_probe_time = nla_get_msecs(tbp[i]);
2046 				break;
2047 			case NDTPA_RETRANS_TIME:
2048 				p->retrans_time = nla_get_msecs(tbp[i]);
2049 				break;
2050 			case NDTPA_ANYCAST_DELAY:
2051 				p->anycast_delay = nla_get_msecs(tbp[i]);
2052 				break;
2053 			case NDTPA_PROXY_DELAY:
2054 				p->proxy_delay = nla_get_msecs(tbp[i]);
2055 				break;
2056 			case NDTPA_LOCKTIME:
2057 				p->locktime = nla_get_msecs(tbp[i]);
2058 				break;
2059 			}
2060 		}
2061 	}
2062 
2063 	if (tb[NDTA_THRESH1])
2064 		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2065 
2066 	if (tb[NDTA_THRESH2])
2067 		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2068 
2069 	if (tb[NDTA_THRESH3])
2070 		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2071 
2072 	if (tb[NDTA_GC_INTERVAL])
2073 		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2074 
2075 	err = 0;
2076 
2077 errout_tbl_lock:
2078 	write_unlock_bh(&tbl->lock);
2079 errout_locked:
2080 	read_unlock(&neigh_tbl_lock);
2081 errout:
2082 	return err;
2083 }
2084 
2085 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2086 {
2087 	struct net *net = sock_net(skb->sk);
2088 	int family, tidx, nidx = 0;
2089 	int tbl_skip = cb->args[0];
2090 	int neigh_skip = cb->args[1];
2091 	struct neigh_table *tbl;
2092 
2093 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2094 
2095 	read_lock(&neigh_tbl_lock);
2096 	for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2097 		struct neigh_parms *p;
2098 
2099 		if (tidx < tbl_skip || (family && tbl->family != family))
2100 			continue;
2101 
2102 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2103 				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2104 				       NLM_F_MULTI) <= 0)
2105 			break;
2106 
2107 		for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2108 			if (!net_eq(neigh_parms_net(p), net))
2109 				continue;
2110 
2111 			if (nidx < neigh_skip)
2112 				goto next;
2113 
2114 			if (neightbl_fill_param_info(skb, tbl, p,
2115 						     NETLINK_CB(cb->skb).pid,
2116 						     cb->nlh->nlmsg_seq,
2117 						     RTM_NEWNEIGHTBL,
2118 						     NLM_F_MULTI) <= 0)
2119 				goto out;
2120 		next:
2121 			nidx++;
2122 		}
2123 
2124 		neigh_skip = 0;
2125 	}
2126 out:
2127 	read_unlock(&neigh_tbl_lock);
2128 	cb->args[0] = tidx;
2129 	cb->args[1] = nidx;
2130 
2131 	return skb->len;
2132 }
2133 
2134 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2135 			   u32 pid, u32 seq, int type, unsigned int flags)
2136 {
2137 	unsigned long now = jiffies;
2138 	struct nda_cacheinfo ci;
2139 	struct nlmsghdr *nlh;
2140 	struct ndmsg *ndm;
2141 
2142 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2143 	if (nlh == NULL)
2144 		return -EMSGSIZE;
2145 
2146 	ndm = nlmsg_data(nlh);
2147 	ndm->ndm_family	 = neigh->ops->family;
2148 	ndm->ndm_pad1    = 0;
2149 	ndm->ndm_pad2    = 0;
2150 	ndm->ndm_flags	 = neigh->flags;
2151 	ndm->ndm_type	 = neigh->type;
2152 	ndm->ndm_ifindex = neigh->dev->ifindex;
2153 
2154 	NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2155 
2156 	read_lock_bh(&neigh->lock);
2157 	ndm->ndm_state	 = neigh->nud_state;
2158 	if (neigh->nud_state & NUD_VALID) {
2159 		char haddr[MAX_ADDR_LEN];
2160 
2161 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2162 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2163 			read_unlock_bh(&neigh->lock);
2164 			goto nla_put_failure;
2165 		}
2166 	}
2167 
2168 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2169 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2170 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2171 	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
2172 	read_unlock_bh(&neigh->lock);
2173 
2174 	NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2175 	NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2176 
2177 	return nlmsg_end(skb, nlh);
2178 
2179 nla_put_failure:
2180 	nlmsg_cancel(skb, nlh);
2181 	return -EMSGSIZE;
2182 }
2183 
2184 static void neigh_update_notify(struct neighbour *neigh)
2185 {
2186 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2187 	__neigh_notify(neigh, RTM_NEWNEIGH, 0);
2188 }
2189 
2190 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2191 			    struct netlink_callback *cb)
2192 {
2193 	struct net *net = sock_net(skb->sk);
2194 	struct neighbour *n;
2195 	int rc, h, s_h = cb->args[1];
2196 	int idx, s_idx = idx = cb->args[2];
2197 	struct neigh_hash_table *nht;
2198 
2199 	rcu_read_lock_bh();
2200 	nht = rcu_dereference_bh(tbl->nht);
2201 
2202 	for (h = 0; h < (1 << nht->hash_shift); h++) {
2203 		if (h < s_h)
2204 			continue;
2205 		if (h > s_h)
2206 			s_idx = 0;
2207 		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2208 		     n != NULL;
2209 		     n = rcu_dereference_bh(n->next)) {
2210 			if (!net_eq(dev_net(n->dev), net))
2211 				continue;
2212 			if (idx < s_idx)
2213 				goto next;
2214 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2215 					    cb->nlh->nlmsg_seq,
2216 					    RTM_NEWNEIGH,
2217 					    NLM_F_MULTI) <= 0) {
2218 				rc = -1;
2219 				goto out;
2220 			}
2221 next:
2222 			idx++;
2223 		}
2224 	}
2225 	rc = skb->len;
2226 out:
2227 	rcu_read_unlock_bh();
2228 	cb->args[1] = h;
2229 	cb->args[2] = idx;
2230 	return rc;
2231 }
2232 
2233 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2234 {
2235 	struct neigh_table *tbl;
2236 	int t, family, s_t;
2237 
2238 	read_lock(&neigh_tbl_lock);
2239 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2240 	s_t = cb->args[0];
2241 
2242 	for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2243 		if (t < s_t || (family && tbl->family != family))
2244 			continue;
2245 		if (t > s_t)
2246 			memset(&cb->args[1], 0, sizeof(cb->args) -
2247 						sizeof(cb->args[0]));
2248 		if (neigh_dump_table(tbl, skb, cb) < 0)
2249 			break;
2250 	}
2251 	read_unlock(&neigh_tbl_lock);
2252 
2253 	cb->args[0] = t;
2254 	return skb->len;
2255 }
2256 
2257 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2258 {
2259 	int chain;
2260 	struct neigh_hash_table *nht;
2261 
2262 	rcu_read_lock_bh();
2263 	nht = rcu_dereference_bh(tbl->nht);
2264 
2265 	read_lock(&tbl->lock); /* avoid resizes */
2266 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2267 		struct neighbour *n;
2268 
2269 		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2270 		     n != NULL;
2271 		     n = rcu_dereference_bh(n->next))
2272 			cb(n, cookie);
2273 	}
2274 	read_unlock(&tbl->lock);
2275 	rcu_read_unlock_bh();
2276 }
2277 EXPORT_SYMBOL(neigh_for_each);
2278 
2279 /* The tbl->lock must be held as a writer and BH disabled. */
2280 void __neigh_for_each_release(struct neigh_table *tbl,
2281 			      int (*cb)(struct neighbour *))
2282 {
2283 	int chain;
2284 	struct neigh_hash_table *nht;
2285 
2286 	nht = rcu_dereference_protected(tbl->nht,
2287 					lockdep_is_held(&tbl->lock));
2288 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2289 		struct neighbour *n;
2290 		struct neighbour __rcu **np;
2291 
2292 		np = &nht->hash_buckets[chain];
2293 		while ((n = rcu_dereference_protected(*np,
2294 					lockdep_is_held(&tbl->lock))) != NULL) {
2295 			int release;
2296 
2297 			write_lock(&n->lock);
2298 			release = cb(n);
2299 			if (release) {
2300 				rcu_assign_pointer(*np,
2301 					rcu_dereference_protected(n->next,
2302 						lockdep_is_held(&tbl->lock)));
2303 				n->dead = 1;
2304 			} else
2305 				np = &n->next;
2306 			write_unlock(&n->lock);
2307 			if (release)
2308 				neigh_cleanup_and_release(n);
2309 		}
2310 	}
2311 }
2312 EXPORT_SYMBOL(__neigh_for_each_release);
2313 
2314 #ifdef CONFIG_PROC_FS
2315 
2316 static struct neighbour *neigh_get_first(struct seq_file *seq)
2317 {
2318 	struct neigh_seq_state *state = seq->private;
2319 	struct net *net = seq_file_net(seq);
2320 	struct neigh_hash_table *nht = state->nht;
2321 	struct neighbour *n = NULL;
2322 	int bucket = state->bucket;
2323 
2324 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2325 	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2326 		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2327 
2328 		while (n) {
2329 			if (!net_eq(dev_net(n->dev), net))
2330 				goto next;
2331 			if (state->neigh_sub_iter) {
2332 				loff_t fakep = 0;
2333 				void *v;
2334 
2335 				v = state->neigh_sub_iter(state, n, &fakep);
2336 				if (!v)
2337 					goto next;
2338 			}
2339 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2340 				break;
2341 			if (n->nud_state & ~NUD_NOARP)
2342 				break;
2343 next:
2344 			n = rcu_dereference_bh(n->next);
2345 		}
2346 
2347 		if (n)
2348 			break;
2349 	}
2350 	state->bucket = bucket;
2351 
2352 	return n;
2353 }
2354 
2355 static struct neighbour *neigh_get_next(struct seq_file *seq,
2356 					struct neighbour *n,
2357 					loff_t *pos)
2358 {
2359 	struct neigh_seq_state *state = seq->private;
2360 	struct net *net = seq_file_net(seq);
2361 	struct neigh_hash_table *nht = state->nht;
2362 
2363 	if (state->neigh_sub_iter) {
2364 		void *v = state->neigh_sub_iter(state, n, pos);
2365 		if (v)
2366 			return n;
2367 	}
2368 	n = rcu_dereference_bh(n->next);
2369 
2370 	while (1) {
2371 		while (n) {
2372 			if (!net_eq(dev_net(n->dev), net))
2373 				goto next;
2374 			if (state->neigh_sub_iter) {
2375 				void *v = state->neigh_sub_iter(state, n, pos);
2376 				if (v)
2377 					return n;
2378 				goto next;
2379 			}
2380 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2381 				break;
2382 
2383 			if (n->nud_state & ~NUD_NOARP)
2384 				break;
2385 next:
2386 			n = rcu_dereference_bh(n->next);
2387 		}
2388 
2389 		if (n)
2390 			break;
2391 
2392 		if (++state->bucket >= (1 << nht->hash_shift))
2393 			break;
2394 
2395 		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2396 	}
2397 
2398 	if (n && pos)
2399 		--(*pos);
2400 	return n;
2401 }
2402 
2403 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2404 {
2405 	struct neighbour *n = neigh_get_first(seq);
2406 
2407 	if (n) {
2408 		--(*pos);
2409 		while (*pos) {
2410 			n = neigh_get_next(seq, n, pos);
2411 			if (!n)
2412 				break;
2413 		}
2414 	}
2415 	return *pos ? NULL : n;
2416 }
2417 
2418 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2419 {
2420 	struct neigh_seq_state *state = seq->private;
2421 	struct net *net = seq_file_net(seq);
2422 	struct neigh_table *tbl = state->tbl;
2423 	struct pneigh_entry *pn = NULL;
2424 	int bucket = state->bucket;
2425 
2426 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
2427 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2428 		pn = tbl->phash_buckets[bucket];
2429 		while (pn && !net_eq(pneigh_net(pn), net))
2430 			pn = pn->next;
2431 		if (pn)
2432 			break;
2433 	}
2434 	state->bucket = bucket;
2435 
2436 	return pn;
2437 }
2438 
2439 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2440 					    struct pneigh_entry *pn,
2441 					    loff_t *pos)
2442 {
2443 	struct neigh_seq_state *state = seq->private;
2444 	struct net *net = seq_file_net(seq);
2445 	struct neigh_table *tbl = state->tbl;
2446 
2447 	pn = pn->next;
2448 	while (!pn) {
2449 		if (++state->bucket > PNEIGH_HASHMASK)
2450 			break;
2451 		pn = tbl->phash_buckets[state->bucket];
2452 		while (pn && !net_eq(pneigh_net(pn), net))
2453 			pn = pn->next;
2454 		if (pn)
2455 			break;
2456 	}
2457 
2458 	if (pn && pos)
2459 		--(*pos);
2460 
2461 	return pn;
2462 }
2463 
2464 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2465 {
2466 	struct pneigh_entry *pn = pneigh_get_first(seq);
2467 
2468 	if (pn) {
2469 		--(*pos);
2470 		while (*pos) {
2471 			pn = pneigh_get_next(seq, pn, pos);
2472 			if (!pn)
2473 				break;
2474 		}
2475 	}
2476 	return *pos ? NULL : pn;
2477 }
2478 
2479 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2480 {
2481 	struct neigh_seq_state *state = seq->private;
2482 	void *rc;
2483 	loff_t idxpos = *pos;
2484 
2485 	rc = neigh_get_idx(seq, &idxpos);
2486 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2487 		rc = pneigh_get_idx(seq, &idxpos);
2488 
2489 	return rc;
2490 }
2491 
2492 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2493 	__acquires(rcu_bh)
2494 {
2495 	struct neigh_seq_state *state = seq->private;
2496 
2497 	state->tbl = tbl;
2498 	state->bucket = 0;
2499 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2500 
2501 	rcu_read_lock_bh();
2502 	state->nht = rcu_dereference_bh(tbl->nht);
2503 
2504 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2505 }
2506 EXPORT_SYMBOL(neigh_seq_start);
2507 
2508 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2509 {
2510 	struct neigh_seq_state *state;
2511 	void *rc;
2512 
2513 	if (v == SEQ_START_TOKEN) {
2514 		rc = neigh_get_first(seq);
2515 		goto out;
2516 	}
2517 
2518 	state = seq->private;
2519 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2520 		rc = neigh_get_next(seq, v, NULL);
2521 		if (rc)
2522 			goto out;
2523 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2524 			rc = pneigh_get_first(seq);
2525 	} else {
2526 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2527 		rc = pneigh_get_next(seq, v, NULL);
2528 	}
2529 out:
2530 	++(*pos);
2531 	return rc;
2532 }
2533 EXPORT_SYMBOL(neigh_seq_next);
2534 
2535 void neigh_seq_stop(struct seq_file *seq, void *v)
2536 	__releases(rcu_bh)
2537 {
2538 	rcu_read_unlock_bh();
2539 }
2540 EXPORT_SYMBOL(neigh_seq_stop);
2541 
2542 /* statistics via seq_file */
2543 
2544 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2545 {
2546 	struct neigh_table *tbl = seq->private;
2547 	int cpu;
2548 
2549 	if (*pos == 0)
2550 		return SEQ_START_TOKEN;
2551 
2552 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2553 		if (!cpu_possible(cpu))
2554 			continue;
2555 		*pos = cpu+1;
2556 		return per_cpu_ptr(tbl->stats, cpu);
2557 	}
2558 	return NULL;
2559 }
2560 
2561 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2562 {
2563 	struct neigh_table *tbl = seq->private;
2564 	int cpu;
2565 
2566 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2567 		if (!cpu_possible(cpu))
2568 			continue;
2569 		*pos = cpu+1;
2570 		return per_cpu_ptr(tbl->stats, cpu);
2571 	}
2572 	return NULL;
2573 }
2574 
2575 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2576 {
2577 
2578 }
2579 
2580 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2581 {
2582 	struct neigh_table *tbl = seq->private;
2583 	struct neigh_statistics *st = v;
2584 
2585 	if (v == SEQ_START_TOKEN) {
2586 		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2587 		return 0;
2588 	}
2589 
2590 	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2591 			"%08lx %08lx  %08lx %08lx %08lx\n",
2592 		   atomic_read(&tbl->entries),
2593 
2594 		   st->allocs,
2595 		   st->destroys,
2596 		   st->hash_grows,
2597 
2598 		   st->lookups,
2599 		   st->hits,
2600 
2601 		   st->res_failed,
2602 
2603 		   st->rcv_probes_mcast,
2604 		   st->rcv_probes_ucast,
2605 
2606 		   st->periodic_gc_runs,
2607 		   st->forced_gc_runs,
2608 		   st->unres_discards
2609 		   );
2610 
2611 	return 0;
2612 }
2613 
2614 static const struct seq_operations neigh_stat_seq_ops = {
2615 	.start	= neigh_stat_seq_start,
2616 	.next	= neigh_stat_seq_next,
2617 	.stop	= neigh_stat_seq_stop,
2618 	.show	= neigh_stat_seq_show,
2619 };
2620 
2621 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2622 {
2623 	int ret = seq_open(file, &neigh_stat_seq_ops);
2624 
2625 	if (!ret) {
2626 		struct seq_file *sf = file->private_data;
2627 		sf->private = PDE(inode)->data;
2628 	}
2629 	return ret;
2630 };
2631 
2632 static const struct file_operations neigh_stat_seq_fops = {
2633 	.owner	 = THIS_MODULE,
2634 	.open 	 = neigh_stat_seq_open,
2635 	.read	 = seq_read,
2636 	.llseek	 = seq_lseek,
2637 	.release = seq_release,
2638 };
2639 
2640 #endif /* CONFIG_PROC_FS */
2641 
2642 static inline size_t neigh_nlmsg_size(void)
2643 {
2644 	return NLMSG_ALIGN(sizeof(struct ndmsg))
2645 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2646 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2647 	       + nla_total_size(sizeof(struct nda_cacheinfo))
2648 	       + nla_total_size(4); /* NDA_PROBES */
2649 }
2650 
2651 static void __neigh_notify(struct neighbour *n, int type, int flags)
2652 {
2653 	struct net *net = dev_net(n->dev);
2654 	struct sk_buff *skb;
2655 	int err = -ENOBUFS;
2656 
2657 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2658 	if (skb == NULL)
2659 		goto errout;
2660 
2661 	err = neigh_fill_info(skb, n, 0, 0, type, flags);
2662 	if (err < 0) {
2663 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2664 		WARN_ON(err == -EMSGSIZE);
2665 		kfree_skb(skb);
2666 		goto errout;
2667 	}
2668 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2669 	return;
2670 errout:
2671 	if (err < 0)
2672 		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2673 }
2674 
2675 #ifdef CONFIG_ARPD
2676 void neigh_app_ns(struct neighbour *n)
2677 {
2678 	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2679 }
2680 EXPORT_SYMBOL(neigh_app_ns);
2681 #endif /* CONFIG_ARPD */
2682 
2683 #ifdef CONFIG_SYSCTL
2684 
2685 #define NEIGH_VARS_MAX 19
2686 
2687 static struct neigh_sysctl_table {
2688 	struct ctl_table_header *sysctl_header;
2689 	struct ctl_table neigh_vars[NEIGH_VARS_MAX];
2690 	char *dev_name;
2691 } neigh_sysctl_template __read_mostly = {
2692 	.neigh_vars = {
2693 		{
2694 			.procname	= "mcast_solicit",
2695 			.maxlen		= sizeof(int),
2696 			.mode		= 0644,
2697 			.proc_handler	= proc_dointvec,
2698 		},
2699 		{
2700 			.procname	= "ucast_solicit",
2701 			.maxlen		= sizeof(int),
2702 			.mode		= 0644,
2703 			.proc_handler	= proc_dointvec,
2704 		},
2705 		{
2706 			.procname	= "app_solicit",
2707 			.maxlen		= sizeof(int),
2708 			.mode		= 0644,
2709 			.proc_handler	= proc_dointvec,
2710 		},
2711 		{
2712 			.procname	= "retrans_time",
2713 			.maxlen		= sizeof(int),
2714 			.mode		= 0644,
2715 			.proc_handler	= proc_dointvec_userhz_jiffies,
2716 		},
2717 		{
2718 			.procname	= "base_reachable_time",
2719 			.maxlen		= sizeof(int),
2720 			.mode		= 0644,
2721 			.proc_handler	= proc_dointvec_jiffies,
2722 		},
2723 		{
2724 			.procname	= "delay_first_probe_time",
2725 			.maxlen		= sizeof(int),
2726 			.mode		= 0644,
2727 			.proc_handler	= proc_dointvec_jiffies,
2728 		},
2729 		{
2730 			.procname	= "gc_stale_time",
2731 			.maxlen		= sizeof(int),
2732 			.mode		= 0644,
2733 			.proc_handler	= proc_dointvec_jiffies,
2734 		},
2735 		{
2736 			.procname	= "unres_qlen",
2737 			.maxlen		= sizeof(int),
2738 			.mode		= 0644,
2739 			.proc_handler	= proc_dointvec,
2740 		},
2741 		{
2742 			.procname	= "proxy_qlen",
2743 			.maxlen		= sizeof(int),
2744 			.mode		= 0644,
2745 			.proc_handler	= proc_dointvec,
2746 		},
2747 		{
2748 			.procname	= "anycast_delay",
2749 			.maxlen		= sizeof(int),
2750 			.mode		= 0644,
2751 			.proc_handler	= proc_dointvec_userhz_jiffies,
2752 		},
2753 		{
2754 			.procname	= "proxy_delay",
2755 			.maxlen		= sizeof(int),
2756 			.mode		= 0644,
2757 			.proc_handler	= proc_dointvec_userhz_jiffies,
2758 		},
2759 		{
2760 			.procname	= "locktime",
2761 			.maxlen		= sizeof(int),
2762 			.mode		= 0644,
2763 			.proc_handler	= proc_dointvec_userhz_jiffies,
2764 		},
2765 		{
2766 			.procname	= "retrans_time_ms",
2767 			.maxlen		= sizeof(int),
2768 			.mode		= 0644,
2769 			.proc_handler	= proc_dointvec_ms_jiffies,
2770 		},
2771 		{
2772 			.procname	= "base_reachable_time_ms",
2773 			.maxlen		= sizeof(int),
2774 			.mode		= 0644,
2775 			.proc_handler	= proc_dointvec_ms_jiffies,
2776 		},
2777 		{
2778 			.procname	= "gc_interval",
2779 			.maxlen		= sizeof(int),
2780 			.mode		= 0644,
2781 			.proc_handler	= proc_dointvec_jiffies,
2782 		},
2783 		{
2784 			.procname	= "gc_thresh1",
2785 			.maxlen		= sizeof(int),
2786 			.mode		= 0644,
2787 			.proc_handler	= proc_dointvec,
2788 		},
2789 		{
2790 			.procname	= "gc_thresh2",
2791 			.maxlen		= sizeof(int),
2792 			.mode		= 0644,
2793 			.proc_handler	= proc_dointvec,
2794 		},
2795 		{
2796 			.procname	= "gc_thresh3",
2797 			.maxlen		= sizeof(int),
2798 			.mode		= 0644,
2799 			.proc_handler	= proc_dointvec,
2800 		},
2801 		{},
2802 	},
2803 };
2804 
2805 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2806 			  char *p_name, proc_handler *handler)
2807 {
2808 	struct neigh_sysctl_table *t;
2809 	const char *dev_name_source = NULL;
2810 
2811 #define NEIGH_CTL_PATH_ROOT	0
2812 #define NEIGH_CTL_PATH_PROTO	1
2813 #define NEIGH_CTL_PATH_NEIGH	2
2814 #define NEIGH_CTL_PATH_DEV	3
2815 
2816 	struct ctl_path neigh_path[] = {
2817 		{ .procname = "net",	 },
2818 		{ .procname = "proto",	 },
2819 		{ .procname = "neigh",	 },
2820 		{ .procname = "default", },
2821 		{ },
2822 	};
2823 
2824 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2825 	if (!t)
2826 		goto err;
2827 
2828 	t->neigh_vars[0].data  = &p->mcast_probes;
2829 	t->neigh_vars[1].data  = &p->ucast_probes;
2830 	t->neigh_vars[2].data  = &p->app_probes;
2831 	t->neigh_vars[3].data  = &p->retrans_time;
2832 	t->neigh_vars[4].data  = &p->base_reachable_time;
2833 	t->neigh_vars[5].data  = &p->delay_probe_time;
2834 	t->neigh_vars[6].data  = &p->gc_staletime;
2835 	t->neigh_vars[7].data  = &p->queue_len;
2836 	t->neigh_vars[8].data  = &p->proxy_qlen;
2837 	t->neigh_vars[9].data  = &p->anycast_delay;
2838 	t->neigh_vars[10].data = &p->proxy_delay;
2839 	t->neigh_vars[11].data = &p->locktime;
2840 	t->neigh_vars[12].data  = &p->retrans_time;
2841 	t->neigh_vars[13].data  = &p->base_reachable_time;
2842 
2843 	if (dev) {
2844 		dev_name_source = dev->name;
2845 		/* Terminate the table early */
2846 		memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
2847 	} else {
2848 		dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2849 		t->neigh_vars[14].data = (int *)(p + 1);
2850 		t->neigh_vars[15].data = (int *)(p + 1) + 1;
2851 		t->neigh_vars[16].data = (int *)(p + 1) + 2;
2852 		t->neigh_vars[17].data = (int *)(p + 1) + 3;
2853 	}
2854 
2855 
2856 	if (handler) {
2857 		/* RetransTime */
2858 		t->neigh_vars[3].proc_handler = handler;
2859 		t->neigh_vars[3].extra1 = dev;
2860 		/* ReachableTime */
2861 		t->neigh_vars[4].proc_handler = handler;
2862 		t->neigh_vars[4].extra1 = dev;
2863 		/* RetransTime (in milliseconds)*/
2864 		t->neigh_vars[12].proc_handler = handler;
2865 		t->neigh_vars[12].extra1 = dev;
2866 		/* ReachableTime (in milliseconds) */
2867 		t->neigh_vars[13].proc_handler = handler;
2868 		t->neigh_vars[13].extra1 = dev;
2869 	}
2870 
2871 	t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2872 	if (!t->dev_name)
2873 		goto free;
2874 
2875 	neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2876 	neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2877 
2878 	t->sysctl_header =
2879 		register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2880 	if (!t->sysctl_header)
2881 		goto free_procname;
2882 
2883 	p->sysctl_table = t;
2884 	return 0;
2885 
2886 free_procname:
2887 	kfree(t->dev_name);
2888 free:
2889 	kfree(t);
2890 err:
2891 	return -ENOBUFS;
2892 }
2893 EXPORT_SYMBOL(neigh_sysctl_register);
2894 
2895 void neigh_sysctl_unregister(struct neigh_parms *p)
2896 {
2897 	if (p->sysctl_table) {
2898 		struct neigh_sysctl_table *t = p->sysctl_table;
2899 		p->sysctl_table = NULL;
2900 		unregister_sysctl_table(t->sysctl_header);
2901 		kfree(t->dev_name);
2902 		kfree(t);
2903 	}
2904 }
2905 EXPORT_SYMBOL(neigh_sysctl_unregister);
2906 
2907 #endif	/* CONFIG_SYSCTL */
2908 
2909 static int __init neigh_init(void)
2910 {
2911 	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
2912 	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
2913 	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
2914 
2915 	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
2916 		      NULL);
2917 	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
2918 
2919 	return 0;
2920 }
2921 
2922 subsys_initcall(neigh_init);
2923 
2924