xref: /openbmc/linux/net/core/neighbour.c (revision 53809828)
1 /*
2  *	Generic address resolution entity
3  *
4  *	Authors:
5  *	Pedro Roque		<roque@di.fc.ul.pt>
6  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *	Fixes:
14  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
15  *	Harald Welte		Add neighbour cache statistics like rtstat
16  */
17 
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19 
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 #include <linux/inetdevice.h>
42 #include <net/addrconf.h>
43 
44 #define DEBUG
45 #define NEIGH_DEBUG 1
46 #define neigh_dbg(level, fmt, ...)		\
47 do {						\
48 	if (level <= NEIGH_DEBUG)		\
49 		pr_debug(fmt, ##__VA_ARGS__);	\
50 } while (0)
51 
52 #define PNEIGH_HASHMASK		0xF
53 
54 static void neigh_timer_handler(struct timer_list *t);
55 static void __neigh_notify(struct neighbour *n, int type, int flags,
56 			   u32 pid);
57 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
58 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
59 				    struct net_device *dev);
60 
61 #ifdef CONFIG_PROC_FS
62 static const struct seq_operations neigh_stat_seq_ops;
63 #endif
64 
65 /*
66    Neighbour hash table buckets are protected with rwlock tbl->lock.
67 
68    - All the scans/updates to hash buckets MUST be made under this lock.
69    - NOTHING clever should be made under this lock: no callbacks
70      to protocol backends, no attempts to send something to network.
71      It will result in deadlocks, if backend/driver wants to use neighbour
72      cache.
73    - If the entry requires some non-trivial actions, increase
74      its reference count and release table lock.
75 
76    Neighbour entries are protected:
77    - with reference count.
78    - with rwlock neigh->lock
79 
80    Reference count prevents destruction.
81 
82    neigh->lock mainly serializes ll address data and its validity state.
83    However, the same lock is used to protect another entry fields:
84     - timer
85     - resolution queue
86 
87    Again, nothing clever shall be made under neigh->lock,
88    the most complicated procedure, which we allow is dev->hard_header.
89    It is supposed, that dev->hard_header is simplistic and does
90    not make callbacks to neighbour tables.
91  */
92 
93 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
94 {
95 	kfree_skb(skb);
96 	return -ENETDOWN;
97 }
98 
99 static void neigh_cleanup_and_release(struct neighbour *neigh)
100 {
101 	if (neigh->parms->neigh_cleanup)
102 		neigh->parms->neigh_cleanup(neigh);
103 
104 	__neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
105 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
106 	neigh_release(neigh);
107 }
108 
109 /*
110  * It is random distribution in the interval (1/2)*base...(3/2)*base.
111  * It corresponds to default IPv6 settings and is not overridable,
112  * because it is really reasonable choice.
113  */
114 
115 unsigned long neigh_rand_reach_time(unsigned long base)
116 {
117 	return base ? (prandom_u32() % base) + (base >> 1) : 0;
118 }
119 EXPORT_SYMBOL(neigh_rand_reach_time);
120 
121 
122 static bool neigh_del(struct neighbour *n, __u8 state, __u8 flags,
123 		      struct neighbour __rcu **np, struct neigh_table *tbl)
124 {
125 	bool retval = false;
126 
127 	write_lock(&n->lock);
128 	if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state) &&
129 	    !(n->flags & flags)) {
130 		struct neighbour *neigh;
131 
132 		neigh = rcu_dereference_protected(n->next,
133 						  lockdep_is_held(&tbl->lock));
134 		rcu_assign_pointer(*np, neigh);
135 		n->dead = 1;
136 		retval = true;
137 	}
138 	write_unlock(&n->lock);
139 	if (retval)
140 		neigh_cleanup_and_release(n);
141 	return retval;
142 }
143 
144 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
145 {
146 	struct neigh_hash_table *nht;
147 	void *pkey = ndel->primary_key;
148 	u32 hash_val;
149 	struct neighbour *n;
150 	struct neighbour __rcu **np;
151 
152 	nht = rcu_dereference_protected(tbl->nht,
153 					lockdep_is_held(&tbl->lock));
154 	hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
155 	hash_val = hash_val >> (32 - nht->hash_shift);
156 
157 	np = &nht->hash_buckets[hash_val];
158 	while ((n = rcu_dereference_protected(*np,
159 					      lockdep_is_held(&tbl->lock)))) {
160 		if (n == ndel)
161 			return neigh_del(n, 0, 0, np, tbl);
162 		np = &n->next;
163 	}
164 	return false;
165 }
166 
167 static int neigh_forced_gc(struct neigh_table *tbl)
168 {
169 	int shrunk = 0;
170 	int i;
171 	struct neigh_hash_table *nht;
172 
173 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
174 
175 	write_lock_bh(&tbl->lock);
176 	nht = rcu_dereference_protected(tbl->nht,
177 					lockdep_is_held(&tbl->lock));
178 	for (i = 0; i < (1 << nht->hash_shift); i++) {
179 		struct neighbour *n;
180 		struct neighbour __rcu **np;
181 
182 		np = &nht->hash_buckets[i];
183 		while ((n = rcu_dereference_protected(*np,
184 					lockdep_is_held(&tbl->lock))) != NULL) {
185 			/* Neighbour record may be discarded if:
186 			 * - nobody refers to it.
187 			 * - it is not permanent
188 			 */
189 			if (neigh_del(n, NUD_PERMANENT, NTF_EXT_LEARNED, np,
190 				      tbl)) {
191 				shrunk = 1;
192 				continue;
193 			}
194 			np = &n->next;
195 		}
196 	}
197 
198 	tbl->last_flush = jiffies;
199 
200 	write_unlock_bh(&tbl->lock);
201 
202 	return shrunk;
203 }
204 
205 static void neigh_add_timer(struct neighbour *n, unsigned long when)
206 {
207 	neigh_hold(n);
208 	if (unlikely(mod_timer(&n->timer, when))) {
209 		printk("NEIGH: BUG, double timer add, state is %x\n",
210 		       n->nud_state);
211 		dump_stack();
212 	}
213 }
214 
215 static int neigh_del_timer(struct neighbour *n)
216 {
217 	if ((n->nud_state & NUD_IN_TIMER) &&
218 	    del_timer(&n->timer)) {
219 		neigh_release(n);
220 		return 1;
221 	}
222 	return 0;
223 }
224 
225 static void pneigh_queue_purge(struct sk_buff_head *list)
226 {
227 	struct sk_buff *skb;
228 
229 	while ((skb = skb_dequeue(list)) != NULL) {
230 		dev_put(skb->dev);
231 		kfree_skb(skb);
232 	}
233 }
234 
235 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
236 			    bool skip_perm)
237 {
238 	int i;
239 	struct neigh_hash_table *nht;
240 
241 	nht = rcu_dereference_protected(tbl->nht,
242 					lockdep_is_held(&tbl->lock));
243 
244 	for (i = 0; i < (1 << nht->hash_shift); i++) {
245 		struct neighbour *n;
246 		struct neighbour __rcu **np = &nht->hash_buckets[i];
247 
248 		while ((n = rcu_dereference_protected(*np,
249 					lockdep_is_held(&tbl->lock))) != NULL) {
250 			if (dev && n->dev != dev) {
251 				np = &n->next;
252 				continue;
253 			}
254 			if (skip_perm && n->nud_state & NUD_PERMANENT) {
255 				np = &n->next;
256 				continue;
257 			}
258 			rcu_assign_pointer(*np,
259 				   rcu_dereference_protected(n->next,
260 						lockdep_is_held(&tbl->lock)));
261 			write_lock(&n->lock);
262 			neigh_del_timer(n);
263 			n->dead = 1;
264 
265 			if (refcount_read(&n->refcnt) != 1) {
266 				/* The most unpleasant situation.
267 				   We must destroy neighbour entry,
268 				   but someone still uses it.
269 
270 				   The destroy will be delayed until
271 				   the last user releases us, but
272 				   we must kill timers etc. and move
273 				   it to safe state.
274 				 */
275 				__skb_queue_purge(&n->arp_queue);
276 				n->arp_queue_len_bytes = 0;
277 				n->output = neigh_blackhole;
278 				if (n->nud_state & NUD_VALID)
279 					n->nud_state = NUD_NOARP;
280 				else
281 					n->nud_state = NUD_NONE;
282 				neigh_dbg(2, "neigh %p is stray\n", n);
283 			}
284 			write_unlock(&n->lock);
285 			neigh_cleanup_and_release(n);
286 		}
287 	}
288 }
289 
290 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
291 {
292 	write_lock_bh(&tbl->lock);
293 	neigh_flush_dev(tbl, dev, false);
294 	write_unlock_bh(&tbl->lock);
295 }
296 EXPORT_SYMBOL(neigh_changeaddr);
297 
298 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
299 			  bool skip_perm)
300 {
301 	write_lock_bh(&tbl->lock);
302 	neigh_flush_dev(tbl, dev, skip_perm);
303 	pneigh_ifdown_and_unlock(tbl, dev);
304 
305 	del_timer_sync(&tbl->proxy_timer);
306 	pneigh_queue_purge(&tbl->proxy_queue);
307 	return 0;
308 }
309 
310 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
311 {
312 	__neigh_ifdown(tbl, dev, true);
313 	return 0;
314 }
315 EXPORT_SYMBOL(neigh_carrier_down);
316 
317 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
318 {
319 	__neigh_ifdown(tbl, dev, false);
320 	return 0;
321 }
322 EXPORT_SYMBOL(neigh_ifdown);
323 
324 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
325 {
326 	struct neighbour *n = NULL;
327 	unsigned long now = jiffies;
328 	int entries;
329 
330 	entries = atomic_inc_return(&tbl->entries) - 1;
331 	if (entries >= tbl->gc_thresh3 ||
332 	    (entries >= tbl->gc_thresh2 &&
333 	     time_after(now, tbl->last_flush + 5 * HZ))) {
334 		if (!neigh_forced_gc(tbl) &&
335 		    entries >= tbl->gc_thresh3) {
336 			net_info_ratelimited("%s: neighbor table overflow!\n",
337 					     tbl->id);
338 			NEIGH_CACHE_STAT_INC(tbl, table_fulls);
339 			goto out_entries;
340 		}
341 	}
342 
343 	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
344 	if (!n)
345 		goto out_entries;
346 
347 	__skb_queue_head_init(&n->arp_queue);
348 	rwlock_init(&n->lock);
349 	seqlock_init(&n->ha_lock);
350 	n->updated	  = n->used = now;
351 	n->nud_state	  = NUD_NONE;
352 	n->output	  = neigh_blackhole;
353 	seqlock_init(&n->hh.hh_lock);
354 	n->parms	  = neigh_parms_clone(&tbl->parms);
355 	timer_setup(&n->timer, neigh_timer_handler, 0);
356 
357 	NEIGH_CACHE_STAT_INC(tbl, allocs);
358 	n->tbl		  = tbl;
359 	refcount_set(&n->refcnt, 1);
360 	n->dead		  = 1;
361 out:
362 	return n;
363 
364 out_entries:
365 	atomic_dec(&tbl->entries);
366 	goto out;
367 }
368 
369 static void neigh_get_hash_rnd(u32 *x)
370 {
371 	*x = get_random_u32() | 1;
372 }
373 
374 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
375 {
376 	size_t size = (1 << shift) * sizeof(struct neighbour *);
377 	struct neigh_hash_table *ret;
378 	struct neighbour __rcu **buckets;
379 	int i;
380 
381 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
382 	if (!ret)
383 		return NULL;
384 	if (size <= PAGE_SIZE)
385 		buckets = kzalloc(size, GFP_ATOMIC);
386 	else
387 		buckets = (struct neighbour __rcu **)
388 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
389 					   get_order(size));
390 	if (!buckets) {
391 		kfree(ret);
392 		return NULL;
393 	}
394 	ret->hash_buckets = buckets;
395 	ret->hash_shift = shift;
396 	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
397 		neigh_get_hash_rnd(&ret->hash_rnd[i]);
398 	return ret;
399 }
400 
401 static void neigh_hash_free_rcu(struct rcu_head *head)
402 {
403 	struct neigh_hash_table *nht = container_of(head,
404 						    struct neigh_hash_table,
405 						    rcu);
406 	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
407 	struct neighbour __rcu **buckets = nht->hash_buckets;
408 
409 	if (size <= PAGE_SIZE)
410 		kfree(buckets);
411 	else
412 		free_pages((unsigned long)buckets, get_order(size));
413 	kfree(nht);
414 }
415 
416 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
417 						unsigned long new_shift)
418 {
419 	unsigned int i, hash;
420 	struct neigh_hash_table *new_nht, *old_nht;
421 
422 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
423 
424 	old_nht = rcu_dereference_protected(tbl->nht,
425 					    lockdep_is_held(&tbl->lock));
426 	new_nht = neigh_hash_alloc(new_shift);
427 	if (!new_nht)
428 		return old_nht;
429 
430 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
431 		struct neighbour *n, *next;
432 
433 		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
434 						   lockdep_is_held(&tbl->lock));
435 		     n != NULL;
436 		     n = next) {
437 			hash = tbl->hash(n->primary_key, n->dev,
438 					 new_nht->hash_rnd);
439 
440 			hash >>= (32 - new_nht->hash_shift);
441 			next = rcu_dereference_protected(n->next,
442 						lockdep_is_held(&tbl->lock));
443 
444 			rcu_assign_pointer(n->next,
445 					   rcu_dereference_protected(
446 						new_nht->hash_buckets[hash],
447 						lockdep_is_held(&tbl->lock)));
448 			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
449 		}
450 	}
451 
452 	rcu_assign_pointer(tbl->nht, new_nht);
453 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
454 	return new_nht;
455 }
456 
457 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
458 			       struct net_device *dev)
459 {
460 	struct neighbour *n;
461 
462 	NEIGH_CACHE_STAT_INC(tbl, lookups);
463 
464 	rcu_read_lock_bh();
465 	n = __neigh_lookup_noref(tbl, pkey, dev);
466 	if (n) {
467 		if (!refcount_inc_not_zero(&n->refcnt))
468 			n = NULL;
469 		NEIGH_CACHE_STAT_INC(tbl, hits);
470 	}
471 
472 	rcu_read_unlock_bh();
473 	return n;
474 }
475 EXPORT_SYMBOL(neigh_lookup);
476 
477 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
478 				     const void *pkey)
479 {
480 	struct neighbour *n;
481 	unsigned int key_len = tbl->key_len;
482 	u32 hash_val;
483 	struct neigh_hash_table *nht;
484 
485 	NEIGH_CACHE_STAT_INC(tbl, lookups);
486 
487 	rcu_read_lock_bh();
488 	nht = rcu_dereference_bh(tbl->nht);
489 	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
490 
491 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
492 	     n != NULL;
493 	     n = rcu_dereference_bh(n->next)) {
494 		if (!memcmp(n->primary_key, pkey, key_len) &&
495 		    net_eq(dev_net(n->dev), net)) {
496 			if (!refcount_inc_not_zero(&n->refcnt))
497 				n = NULL;
498 			NEIGH_CACHE_STAT_INC(tbl, hits);
499 			break;
500 		}
501 	}
502 
503 	rcu_read_unlock_bh();
504 	return n;
505 }
506 EXPORT_SYMBOL(neigh_lookup_nodev);
507 
508 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
509 				 struct net_device *dev, bool want_ref)
510 {
511 	u32 hash_val;
512 	unsigned int key_len = tbl->key_len;
513 	int error;
514 	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
515 	struct neigh_hash_table *nht;
516 
517 	if (!n) {
518 		rc = ERR_PTR(-ENOBUFS);
519 		goto out;
520 	}
521 
522 	memcpy(n->primary_key, pkey, key_len);
523 	n->dev = dev;
524 	dev_hold(dev);
525 
526 	/* Protocol specific setup. */
527 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
528 		rc = ERR_PTR(error);
529 		goto out_neigh_release;
530 	}
531 
532 	if (dev->netdev_ops->ndo_neigh_construct) {
533 		error = dev->netdev_ops->ndo_neigh_construct(dev, n);
534 		if (error < 0) {
535 			rc = ERR_PTR(error);
536 			goto out_neigh_release;
537 		}
538 	}
539 
540 	/* Device specific setup. */
541 	if (n->parms->neigh_setup &&
542 	    (error = n->parms->neigh_setup(n)) < 0) {
543 		rc = ERR_PTR(error);
544 		goto out_neigh_release;
545 	}
546 
547 	n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
548 
549 	write_lock_bh(&tbl->lock);
550 	nht = rcu_dereference_protected(tbl->nht,
551 					lockdep_is_held(&tbl->lock));
552 
553 	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
554 		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
555 
556 	hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
557 
558 	if (n->parms->dead) {
559 		rc = ERR_PTR(-EINVAL);
560 		goto out_tbl_unlock;
561 	}
562 
563 	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
564 					    lockdep_is_held(&tbl->lock));
565 	     n1 != NULL;
566 	     n1 = rcu_dereference_protected(n1->next,
567 			lockdep_is_held(&tbl->lock))) {
568 		if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
569 			if (want_ref)
570 				neigh_hold(n1);
571 			rc = n1;
572 			goto out_tbl_unlock;
573 		}
574 	}
575 
576 	n->dead = 0;
577 	if (want_ref)
578 		neigh_hold(n);
579 	rcu_assign_pointer(n->next,
580 			   rcu_dereference_protected(nht->hash_buckets[hash_val],
581 						     lockdep_is_held(&tbl->lock)));
582 	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
583 	write_unlock_bh(&tbl->lock);
584 	neigh_dbg(2, "neigh %p is created\n", n);
585 	rc = n;
586 out:
587 	return rc;
588 out_tbl_unlock:
589 	write_unlock_bh(&tbl->lock);
590 out_neigh_release:
591 	neigh_release(n);
592 	goto out;
593 }
594 EXPORT_SYMBOL(__neigh_create);
595 
596 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
597 {
598 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
599 	hash_val ^= (hash_val >> 16);
600 	hash_val ^= hash_val >> 8;
601 	hash_val ^= hash_val >> 4;
602 	hash_val &= PNEIGH_HASHMASK;
603 	return hash_val;
604 }
605 
606 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
607 					      struct net *net,
608 					      const void *pkey,
609 					      unsigned int key_len,
610 					      struct net_device *dev)
611 {
612 	while (n) {
613 		if (!memcmp(n->key, pkey, key_len) &&
614 		    net_eq(pneigh_net(n), net) &&
615 		    (n->dev == dev || !n->dev))
616 			return n;
617 		n = n->next;
618 	}
619 	return NULL;
620 }
621 
622 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
623 		struct net *net, const void *pkey, struct net_device *dev)
624 {
625 	unsigned int key_len = tbl->key_len;
626 	u32 hash_val = pneigh_hash(pkey, key_len);
627 
628 	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
629 				 net, pkey, key_len, dev);
630 }
631 EXPORT_SYMBOL_GPL(__pneigh_lookup);
632 
633 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
634 				    struct net *net, const void *pkey,
635 				    struct net_device *dev, int creat)
636 {
637 	struct pneigh_entry *n;
638 	unsigned int key_len = tbl->key_len;
639 	u32 hash_val = pneigh_hash(pkey, key_len);
640 
641 	read_lock_bh(&tbl->lock);
642 	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
643 			      net, pkey, key_len, dev);
644 	read_unlock_bh(&tbl->lock);
645 
646 	if (n || !creat)
647 		goto out;
648 
649 	ASSERT_RTNL();
650 
651 	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
652 	if (!n)
653 		goto out;
654 
655 	write_pnet(&n->net, net);
656 	memcpy(n->key, pkey, key_len);
657 	n->dev = dev;
658 	if (dev)
659 		dev_hold(dev);
660 
661 	if (tbl->pconstructor && tbl->pconstructor(n)) {
662 		if (dev)
663 			dev_put(dev);
664 		kfree(n);
665 		n = NULL;
666 		goto out;
667 	}
668 
669 	write_lock_bh(&tbl->lock);
670 	n->next = tbl->phash_buckets[hash_val];
671 	tbl->phash_buckets[hash_val] = n;
672 	write_unlock_bh(&tbl->lock);
673 out:
674 	return n;
675 }
676 EXPORT_SYMBOL(pneigh_lookup);
677 
678 
679 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
680 		  struct net_device *dev)
681 {
682 	struct pneigh_entry *n, **np;
683 	unsigned int key_len = tbl->key_len;
684 	u32 hash_val = pneigh_hash(pkey, key_len);
685 
686 	write_lock_bh(&tbl->lock);
687 	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
688 	     np = &n->next) {
689 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
690 		    net_eq(pneigh_net(n), net)) {
691 			*np = n->next;
692 			write_unlock_bh(&tbl->lock);
693 			if (tbl->pdestructor)
694 				tbl->pdestructor(n);
695 			if (n->dev)
696 				dev_put(n->dev);
697 			kfree(n);
698 			return 0;
699 		}
700 	}
701 	write_unlock_bh(&tbl->lock);
702 	return -ENOENT;
703 }
704 
705 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
706 				    struct net_device *dev)
707 {
708 	struct pneigh_entry *n, **np, *freelist = NULL;
709 	u32 h;
710 
711 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
712 		np = &tbl->phash_buckets[h];
713 		while ((n = *np) != NULL) {
714 			if (!dev || n->dev == dev) {
715 				*np = n->next;
716 				n->next = freelist;
717 				freelist = n;
718 				continue;
719 			}
720 			np = &n->next;
721 		}
722 	}
723 	write_unlock_bh(&tbl->lock);
724 	while ((n = freelist)) {
725 		freelist = n->next;
726 		n->next = NULL;
727 		if (tbl->pdestructor)
728 			tbl->pdestructor(n);
729 		if (n->dev)
730 			dev_put(n->dev);
731 		kfree(n);
732 	}
733 	return -ENOENT;
734 }
735 
736 static void neigh_parms_destroy(struct neigh_parms *parms);
737 
738 static inline void neigh_parms_put(struct neigh_parms *parms)
739 {
740 	if (refcount_dec_and_test(&parms->refcnt))
741 		neigh_parms_destroy(parms);
742 }
743 
744 /*
745  *	neighbour must already be out of the table;
746  *
747  */
748 void neigh_destroy(struct neighbour *neigh)
749 {
750 	struct net_device *dev = neigh->dev;
751 
752 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
753 
754 	if (!neigh->dead) {
755 		pr_warn("Destroying alive neighbour %p\n", neigh);
756 		dump_stack();
757 		return;
758 	}
759 
760 	if (neigh_del_timer(neigh))
761 		pr_warn("Impossible event\n");
762 
763 	write_lock_bh(&neigh->lock);
764 	__skb_queue_purge(&neigh->arp_queue);
765 	write_unlock_bh(&neigh->lock);
766 	neigh->arp_queue_len_bytes = 0;
767 
768 	if (dev->netdev_ops->ndo_neigh_destroy)
769 		dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
770 
771 	dev_put(dev);
772 	neigh_parms_put(neigh->parms);
773 
774 	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
775 
776 	atomic_dec(&neigh->tbl->entries);
777 	kfree_rcu(neigh, rcu);
778 }
779 EXPORT_SYMBOL(neigh_destroy);
780 
781 /* Neighbour state is suspicious;
782    disable fast path.
783 
784    Called with write_locked neigh.
785  */
786 static void neigh_suspect(struct neighbour *neigh)
787 {
788 	neigh_dbg(2, "neigh %p is suspected\n", neigh);
789 
790 	neigh->output = neigh->ops->output;
791 }
792 
793 /* Neighbour state is OK;
794    enable fast path.
795 
796    Called with write_locked neigh.
797  */
798 static void neigh_connect(struct neighbour *neigh)
799 {
800 	neigh_dbg(2, "neigh %p is connected\n", neigh);
801 
802 	neigh->output = neigh->ops->connected_output;
803 }
804 
805 static void neigh_periodic_work(struct work_struct *work)
806 {
807 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
808 	struct neighbour *n;
809 	struct neighbour __rcu **np;
810 	unsigned int i;
811 	struct neigh_hash_table *nht;
812 
813 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
814 
815 	write_lock_bh(&tbl->lock);
816 	nht = rcu_dereference_protected(tbl->nht,
817 					lockdep_is_held(&tbl->lock));
818 
819 	/*
820 	 *	periodically recompute ReachableTime from random function
821 	 */
822 
823 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
824 		struct neigh_parms *p;
825 		tbl->last_rand = jiffies;
826 		list_for_each_entry(p, &tbl->parms_list, list)
827 			p->reachable_time =
828 				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
829 	}
830 
831 	if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
832 		goto out;
833 
834 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
835 		np = &nht->hash_buckets[i];
836 
837 		while ((n = rcu_dereference_protected(*np,
838 				lockdep_is_held(&tbl->lock))) != NULL) {
839 			unsigned int state;
840 
841 			write_lock(&n->lock);
842 
843 			state = n->nud_state;
844 			if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
845 			    (n->flags & NTF_EXT_LEARNED)) {
846 				write_unlock(&n->lock);
847 				goto next_elt;
848 			}
849 
850 			if (time_before(n->used, n->confirmed))
851 				n->used = n->confirmed;
852 
853 			if (refcount_read(&n->refcnt) == 1 &&
854 			    (state == NUD_FAILED ||
855 			     time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
856 				*np = n->next;
857 				n->dead = 1;
858 				write_unlock(&n->lock);
859 				neigh_cleanup_and_release(n);
860 				continue;
861 			}
862 			write_unlock(&n->lock);
863 
864 next_elt:
865 			np = &n->next;
866 		}
867 		/*
868 		 * It's fine to release lock here, even if hash table
869 		 * grows while we are preempted.
870 		 */
871 		write_unlock_bh(&tbl->lock);
872 		cond_resched();
873 		write_lock_bh(&tbl->lock);
874 		nht = rcu_dereference_protected(tbl->nht,
875 						lockdep_is_held(&tbl->lock));
876 	}
877 out:
878 	/* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
879 	 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
880 	 * BASE_REACHABLE_TIME.
881 	 */
882 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
883 			      NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
884 	write_unlock_bh(&tbl->lock);
885 }
886 
887 static __inline__ int neigh_max_probes(struct neighbour *n)
888 {
889 	struct neigh_parms *p = n->parms;
890 	return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
891 	       (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
892 	        NEIGH_VAR(p, MCAST_PROBES));
893 }
894 
895 static void neigh_invalidate(struct neighbour *neigh)
896 	__releases(neigh->lock)
897 	__acquires(neigh->lock)
898 {
899 	struct sk_buff *skb;
900 
901 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
902 	neigh_dbg(2, "neigh %p is failed\n", neigh);
903 	neigh->updated = jiffies;
904 
905 	/* It is very thin place. report_unreachable is very complicated
906 	   routine. Particularly, it can hit the same neighbour entry!
907 
908 	   So that, we try to be accurate and avoid dead loop. --ANK
909 	 */
910 	while (neigh->nud_state == NUD_FAILED &&
911 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
912 		write_unlock(&neigh->lock);
913 		neigh->ops->error_report(neigh, skb);
914 		write_lock(&neigh->lock);
915 	}
916 	__skb_queue_purge(&neigh->arp_queue);
917 	neigh->arp_queue_len_bytes = 0;
918 }
919 
920 static void neigh_probe(struct neighbour *neigh)
921 	__releases(neigh->lock)
922 {
923 	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
924 	/* keep skb alive even if arp_queue overflows */
925 	if (skb)
926 		skb = skb_clone(skb, GFP_ATOMIC);
927 	write_unlock(&neigh->lock);
928 	if (neigh->ops->solicit)
929 		neigh->ops->solicit(neigh, skb);
930 	atomic_inc(&neigh->probes);
931 	kfree_skb(skb);
932 }
933 
934 /* Called when a timer expires for a neighbour entry. */
935 
936 static void neigh_timer_handler(struct timer_list *t)
937 {
938 	unsigned long now, next;
939 	struct neighbour *neigh = from_timer(neigh, t, timer);
940 	unsigned int state;
941 	int notify = 0;
942 
943 	write_lock(&neigh->lock);
944 
945 	state = neigh->nud_state;
946 	now = jiffies;
947 	next = now + HZ;
948 
949 	if (!(state & NUD_IN_TIMER))
950 		goto out;
951 
952 	if (state & NUD_REACHABLE) {
953 		if (time_before_eq(now,
954 				   neigh->confirmed + neigh->parms->reachable_time)) {
955 			neigh_dbg(2, "neigh %p is still alive\n", neigh);
956 			next = neigh->confirmed + neigh->parms->reachable_time;
957 		} else if (time_before_eq(now,
958 					  neigh->used +
959 					  NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
960 			neigh_dbg(2, "neigh %p is delayed\n", neigh);
961 			neigh->nud_state = NUD_DELAY;
962 			neigh->updated = jiffies;
963 			neigh_suspect(neigh);
964 			next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
965 		} else {
966 			neigh_dbg(2, "neigh %p is suspected\n", neigh);
967 			neigh->nud_state = NUD_STALE;
968 			neigh->updated = jiffies;
969 			neigh_suspect(neigh);
970 			notify = 1;
971 		}
972 	} else if (state & NUD_DELAY) {
973 		if (time_before_eq(now,
974 				   neigh->confirmed +
975 				   NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
976 			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
977 			neigh->nud_state = NUD_REACHABLE;
978 			neigh->updated = jiffies;
979 			neigh_connect(neigh);
980 			notify = 1;
981 			next = neigh->confirmed + neigh->parms->reachable_time;
982 		} else {
983 			neigh_dbg(2, "neigh %p is probed\n", neigh);
984 			neigh->nud_state = NUD_PROBE;
985 			neigh->updated = jiffies;
986 			atomic_set(&neigh->probes, 0);
987 			notify = 1;
988 			next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
989 		}
990 	} else {
991 		/* NUD_PROBE|NUD_INCOMPLETE */
992 		next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
993 	}
994 
995 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
996 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
997 		neigh->nud_state = NUD_FAILED;
998 		notify = 1;
999 		neigh_invalidate(neigh);
1000 		goto out;
1001 	}
1002 
1003 	if (neigh->nud_state & NUD_IN_TIMER) {
1004 		if (time_before(next, jiffies + HZ/2))
1005 			next = jiffies + HZ/2;
1006 		if (!mod_timer(&neigh->timer, next))
1007 			neigh_hold(neigh);
1008 	}
1009 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1010 		neigh_probe(neigh);
1011 	} else {
1012 out:
1013 		write_unlock(&neigh->lock);
1014 	}
1015 
1016 	if (notify)
1017 		neigh_update_notify(neigh, 0);
1018 
1019 	neigh_release(neigh);
1020 }
1021 
1022 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1023 {
1024 	int rc;
1025 	bool immediate_probe = false;
1026 
1027 	write_lock_bh(&neigh->lock);
1028 
1029 	rc = 0;
1030 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1031 		goto out_unlock_bh;
1032 	if (neigh->dead)
1033 		goto out_dead;
1034 
1035 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1036 		if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1037 		    NEIGH_VAR(neigh->parms, APP_PROBES)) {
1038 			unsigned long next, now = jiffies;
1039 
1040 			atomic_set(&neigh->probes,
1041 				   NEIGH_VAR(neigh->parms, UCAST_PROBES));
1042 			neigh->nud_state     = NUD_INCOMPLETE;
1043 			neigh->updated = now;
1044 			next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1045 					 HZ/2);
1046 			neigh_add_timer(neigh, next);
1047 			immediate_probe = true;
1048 		} else {
1049 			neigh->nud_state = NUD_FAILED;
1050 			neigh->updated = jiffies;
1051 			write_unlock_bh(&neigh->lock);
1052 
1053 			kfree_skb(skb);
1054 			return 1;
1055 		}
1056 	} else if (neigh->nud_state & NUD_STALE) {
1057 		neigh_dbg(2, "neigh %p is delayed\n", neigh);
1058 		neigh->nud_state = NUD_DELAY;
1059 		neigh->updated = jiffies;
1060 		neigh_add_timer(neigh, jiffies +
1061 				NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1062 	}
1063 
1064 	if (neigh->nud_state == NUD_INCOMPLETE) {
1065 		if (skb) {
1066 			while (neigh->arp_queue_len_bytes + skb->truesize >
1067 			       NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1068 				struct sk_buff *buff;
1069 
1070 				buff = __skb_dequeue(&neigh->arp_queue);
1071 				if (!buff)
1072 					break;
1073 				neigh->arp_queue_len_bytes -= buff->truesize;
1074 				kfree_skb(buff);
1075 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1076 			}
1077 			skb_dst_force(skb);
1078 			__skb_queue_tail(&neigh->arp_queue, skb);
1079 			neigh->arp_queue_len_bytes += skb->truesize;
1080 		}
1081 		rc = 1;
1082 	}
1083 out_unlock_bh:
1084 	if (immediate_probe)
1085 		neigh_probe(neigh);
1086 	else
1087 		write_unlock(&neigh->lock);
1088 	local_bh_enable();
1089 	return rc;
1090 
1091 out_dead:
1092 	if (neigh->nud_state & NUD_STALE)
1093 		goto out_unlock_bh;
1094 	write_unlock_bh(&neigh->lock);
1095 	kfree_skb(skb);
1096 	return 1;
1097 }
1098 EXPORT_SYMBOL(__neigh_event_send);
1099 
1100 static void neigh_update_hhs(struct neighbour *neigh)
1101 {
1102 	struct hh_cache *hh;
1103 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1104 		= NULL;
1105 
1106 	if (neigh->dev->header_ops)
1107 		update = neigh->dev->header_ops->cache_update;
1108 
1109 	if (update) {
1110 		hh = &neigh->hh;
1111 		if (hh->hh_len) {
1112 			write_seqlock_bh(&hh->hh_lock);
1113 			update(hh, neigh->dev, neigh->ha);
1114 			write_sequnlock_bh(&hh->hh_lock);
1115 		}
1116 	}
1117 }
1118 
1119 
1120 
1121 /* Generic update routine.
1122    -- lladdr is new lladdr or NULL, if it is not supplied.
1123    -- new    is new state.
1124    -- flags
1125 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1126 				if it is different.
1127 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1128 				lladdr instead of overriding it
1129 				if it is different.
1130 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1131 
1132 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1133 				NTF_ROUTER flag.
1134 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1135 				a router.
1136 
1137    Caller MUST hold reference count on the entry.
1138  */
1139 
1140 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1141 		 u32 flags, u32 nlmsg_pid)
1142 {
1143 	u8 old;
1144 	int err;
1145 	int notify = 0;
1146 	struct net_device *dev;
1147 	int update_isrouter = 0;
1148 
1149 	write_lock_bh(&neigh->lock);
1150 
1151 	dev    = neigh->dev;
1152 	old    = neigh->nud_state;
1153 	err    = -EPERM;
1154 
1155 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1156 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1157 		goto out;
1158 	if (neigh->dead)
1159 		goto out;
1160 
1161 	neigh_update_ext_learned(neigh, flags, &notify);
1162 
1163 	if (!(new & NUD_VALID)) {
1164 		neigh_del_timer(neigh);
1165 		if (old & NUD_CONNECTED)
1166 			neigh_suspect(neigh);
1167 		neigh->nud_state = new;
1168 		err = 0;
1169 		notify = old & NUD_VALID;
1170 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1171 		    (new & NUD_FAILED)) {
1172 			neigh_invalidate(neigh);
1173 			notify = 1;
1174 		}
1175 		goto out;
1176 	}
1177 
1178 	/* Compare new lladdr with cached one */
1179 	if (!dev->addr_len) {
1180 		/* First case: device needs no address. */
1181 		lladdr = neigh->ha;
1182 	} else if (lladdr) {
1183 		/* The second case: if something is already cached
1184 		   and a new address is proposed:
1185 		   - compare new & old
1186 		   - if they are different, check override flag
1187 		 */
1188 		if ((old & NUD_VALID) &&
1189 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1190 			lladdr = neigh->ha;
1191 	} else {
1192 		/* No address is supplied; if we know something,
1193 		   use it, otherwise discard the request.
1194 		 */
1195 		err = -EINVAL;
1196 		if (!(old & NUD_VALID))
1197 			goto out;
1198 		lladdr = neigh->ha;
1199 	}
1200 
1201 	/* Update confirmed timestamp for neighbour entry after we
1202 	 * received ARP packet even if it doesn't change IP to MAC binding.
1203 	 */
1204 	if (new & NUD_CONNECTED)
1205 		neigh->confirmed = jiffies;
1206 
1207 	/* If entry was valid and address is not changed,
1208 	   do not change entry state, if new one is STALE.
1209 	 */
1210 	err = 0;
1211 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1212 	if (old & NUD_VALID) {
1213 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1214 			update_isrouter = 0;
1215 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1216 			    (old & NUD_CONNECTED)) {
1217 				lladdr = neigh->ha;
1218 				new = NUD_STALE;
1219 			} else
1220 				goto out;
1221 		} else {
1222 			if (lladdr == neigh->ha && new == NUD_STALE &&
1223 			    !(flags & NEIGH_UPDATE_F_ADMIN))
1224 				new = old;
1225 		}
1226 	}
1227 
1228 	/* Update timestamp only once we know we will make a change to the
1229 	 * neighbour entry. Otherwise we risk to move the locktime window with
1230 	 * noop updates and ignore relevant ARP updates.
1231 	 */
1232 	if (new != old || lladdr != neigh->ha)
1233 		neigh->updated = jiffies;
1234 
1235 	if (new != old) {
1236 		neigh_del_timer(neigh);
1237 		if (new & NUD_PROBE)
1238 			atomic_set(&neigh->probes, 0);
1239 		if (new & NUD_IN_TIMER)
1240 			neigh_add_timer(neigh, (jiffies +
1241 						((new & NUD_REACHABLE) ?
1242 						 neigh->parms->reachable_time :
1243 						 0)));
1244 		neigh->nud_state = new;
1245 		notify = 1;
1246 	}
1247 
1248 	if (lladdr != neigh->ha) {
1249 		write_seqlock(&neigh->ha_lock);
1250 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1251 		write_sequnlock(&neigh->ha_lock);
1252 		neigh_update_hhs(neigh);
1253 		if (!(new & NUD_CONNECTED))
1254 			neigh->confirmed = jiffies -
1255 				      (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1256 		notify = 1;
1257 	}
1258 	if (new == old)
1259 		goto out;
1260 	if (new & NUD_CONNECTED)
1261 		neigh_connect(neigh);
1262 	else
1263 		neigh_suspect(neigh);
1264 	if (!(old & NUD_VALID)) {
1265 		struct sk_buff *skb;
1266 
1267 		/* Again: avoid dead loop if something went wrong */
1268 
1269 		while (neigh->nud_state & NUD_VALID &&
1270 		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1271 			struct dst_entry *dst = skb_dst(skb);
1272 			struct neighbour *n2, *n1 = neigh;
1273 			write_unlock_bh(&neigh->lock);
1274 
1275 			rcu_read_lock();
1276 
1277 			/* Why not just use 'neigh' as-is?  The problem is that
1278 			 * things such as shaper, eql, and sch_teql can end up
1279 			 * using alternative, different, neigh objects to output
1280 			 * the packet in the output path.  So what we need to do
1281 			 * here is re-lookup the top-level neigh in the path so
1282 			 * we can reinject the packet there.
1283 			 */
1284 			n2 = NULL;
1285 			if (dst) {
1286 				n2 = dst_neigh_lookup_skb(dst, skb);
1287 				if (n2)
1288 					n1 = n2;
1289 			}
1290 			n1->output(n1, skb);
1291 			if (n2)
1292 				neigh_release(n2);
1293 			rcu_read_unlock();
1294 
1295 			write_lock_bh(&neigh->lock);
1296 		}
1297 		__skb_queue_purge(&neigh->arp_queue);
1298 		neigh->arp_queue_len_bytes = 0;
1299 	}
1300 out:
1301 	if (update_isrouter)
1302 		neigh_update_is_router(neigh, flags, &notify);
1303 	write_unlock_bh(&neigh->lock);
1304 
1305 	if (notify)
1306 		neigh_update_notify(neigh, nlmsg_pid);
1307 
1308 	return err;
1309 }
1310 EXPORT_SYMBOL(neigh_update);
1311 
1312 /* Update the neigh to listen temporarily for probe responses, even if it is
1313  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1314  */
1315 void __neigh_set_probe_once(struct neighbour *neigh)
1316 {
1317 	if (neigh->dead)
1318 		return;
1319 	neigh->updated = jiffies;
1320 	if (!(neigh->nud_state & NUD_FAILED))
1321 		return;
1322 	neigh->nud_state = NUD_INCOMPLETE;
1323 	atomic_set(&neigh->probes, neigh_max_probes(neigh));
1324 	neigh_add_timer(neigh,
1325 			jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1326 }
1327 EXPORT_SYMBOL(__neigh_set_probe_once);
1328 
1329 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1330 				 u8 *lladdr, void *saddr,
1331 				 struct net_device *dev)
1332 {
1333 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1334 						 lladdr || !dev->addr_len);
1335 	if (neigh)
1336 		neigh_update(neigh, lladdr, NUD_STALE,
1337 			     NEIGH_UPDATE_F_OVERRIDE, 0);
1338 	return neigh;
1339 }
1340 EXPORT_SYMBOL(neigh_event_ns);
1341 
1342 /* called with read_lock_bh(&n->lock); */
1343 static void neigh_hh_init(struct neighbour *n)
1344 {
1345 	struct net_device *dev = n->dev;
1346 	__be16 prot = n->tbl->protocol;
1347 	struct hh_cache	*hh = &n->hh;
1348 
1349 	write_lock_bh(&n->lock);
1350 
1351 	/* Only one thread can come in here and initialize the
1352 	 * hh_cache entry.
1353 	 */
1354 	if (!hh->hh_len)
1355 		dev->header_ops->cache(n, hh, prot);
1356 
1357 	write_unlock_bh(&n->lock);
1358 }
1359 
1360 /* Slow and careful. */
1361 
1362 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1363 {
1364 	int rc = 0;
1365 
1366 	if (!neigh_event_send(neigh, skb)) {
1367 		int err;
1368 		struct net_device *dev = neigh->dev;
1369 		unsigned int seq;
1370 
1371 		if (dev->header_ops->cache && !neigh->hh.hh_len)
1372 			neigh_hh_init(neigh);
1373 
1374 		do {
1375 			__skb_pull(skb, skb_network_offset(skb));
1376 			seq = read_seqbegin(&neigh->ha_lock);
1377 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1378 					      neigh->ha, NULL, skb->len);
1379 		} while (read_seqretry(&neigh->ha_lock, seq));
1380 
1381 		if (err >= 0)
1382 			rc = dev_queue_xmit(skb);
1383 		else
1384 			goto out_kfree_skb;
1385 	}
1386 out:
1387 	return rc;
1388 out_kfree_skb:
1389 	rc = -EINVAL;
1390 	kfree_skb(skb);
1391 	goto out;
1392 }
1393 EXPORT_SYMBOL(neigh_resolve_output);
1394 
1395 /* As fast as possible without hh cache */
1396 
1397 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1398 {
1399 	struct net_device *dev = neigh->dev;
1400 	unsigned int seq;
1401 	int err;
1402 
1403 	do {
1404 		__skb_pull(skb, skb_network_offset(skb));
1405 		seq = read_seqbegin(&neigh->ha_lock);
1406 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1407 				      neigh->ha, NULL, skb->len);
1408 	} while (read_seqretry(&neigh->ha_lock, seq));
1409 
1410 	if (err >= 0)
1411 		err = dev_queue_xmit(skb);
1412 	else {
1413 		err = -EINVAL;
1414 		kfree_skb(skb);
1415 	}
1416 	return err;
1417 }
1418 EXPORT_SYMBOL(neigh_connected_output);
1419 
1420 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1421 {
1422 	return dev_queue_xmit(skb);
1423 }
1424 EXPORT_SYMBOL(neigh_direct_output);
1425 
1426 static void neigh_proxy_process(struct timer_list *t)
1427 {
1428 	struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1429 	long sched_next = 0;
1430 	unsigned long now = jiffies;
1431 	struct sk_buff *skb, *n;
1432 
1433 	spin_lock(&tbl->proxy_queue.lock);
1434 
1435 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1436 		long tdif = NEIGH_CB(skb)->sched_next - now;
1437 
1438 		if (tdif <= 0) {
1439 			struct net_device *dev = skb->dev;
1440 
1441 			__skb_unlink(skb, &tbl->proxy_queue);
1442 			if (tbl->proxy_redo && netif_running(dev)) {
1443 				rcu_read_lock();
1444 				tbl->proxy_redo(skb);
1445 				rcu_read_unlock();
1446 			} else {
1447 				kfree_skb(skb);
1448 			}
1449 
1450 			dev_put(dev);
1451 		} else if (!sched_next || tdif < sched_next)
1452 			sched_next = tdif;
1453 	}
1454 	del_timer(&tbl->proxy_timer);
1455 	if (sched_next)
1456 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1457 	spin_unlock(&tbl->proxy_queue.lock);
1458 }
1459 
1460 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1461 		    struct sk_buff *skb)
1462 {
1463 	unsigned long now = jiffies;
1464 
1465 	unsigned long sched_next = now + (prandom_u32() %
1466 					  NEIGH_VAR(p, PROXY_DELAY));
1467 
1468 	if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1469 		kfree_skb(skb);
1470 		return;
1471 	}
1472 
1473 	NEIGH_CB(skb)->sched_next = sched_next;
1474 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1475 
1476 	spin_lock(&tbl->proxy_queue.lock);
1477 	if (del_timer(&tbl->proxy_timer)) {
1478 		if (time_before(tbl->proxy_timer.expires, sched_next))
1479 			sched_next = tbl->proxy_timer.expires;
1480 	}
1481 	skb_dst_drop(skb);
1482 	dev_hold(skb->dev);
1483 	__skb_queue_tail(&tbl->proxy_queue, skb);
1484 	mod_timer(&tbl->proxy_timer, sched_next);
1485 	spin_unlock(&tbl->proxy_queue.lock);
1486 }
1487 EXPORT_SYMBOL(pneigh_enqueue);
1488 
1489 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1490 						      struct net *net, int ifindex)
1491 {
1492 	struct neigh_parms *p;
1493 
1494 	list_for_each_entry(p, &tbl->parms_list, list) {
1495 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1496 		    (!p->dev && !ifindex && net_eq(net, &init_net)))
1497 			return p;
1498 	}
1499 
1500 	return NULL;
1501 }
1502 
1503 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1504 				      struct neigh_table *tbl)
1505 {
1506 	struct neigh_parms *p;
1507 	struct net *net = dev_net(dev);
1508 	const struct net_device_ops *ops = dev->netdev_ops;
1509 
1510 	p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1511 	if (p) {
1512 		p->tbl		  = tbl;
1513 		refcount_set(&p->refcnt, 1);
1514 		p->reachable_time =
1515 				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1516 		dev_hold(dev);
1517 		p->dev = dev;
1518 		write_pnet(&p->net, net);
1519 		p->sysctl_table = NULL;
1520 
1521 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1522 			dev_put(dev);
1523 			kfree(p);
1524 			return NULL;
1525 		}
1526 
1527 		write_lock_bh(&tbl->lock);
1528 		list_add(&p->list, &tbl->parms.list);
1529 		write_unlock_bh(&tbl->lock);
1530 
1531 		neigh_parms_data_state_cleanall(p);
1532 	}
1533 	return p;
1534 }
1535 EXPORT_SYMBOL(neigh_parms_alloc);
1536 
1537 static void neigh_rcu_free_parms(struct rcu_head *head)
1538 {
1539 	struct neigh_parms *parms =
1540 		container_of(head, struct neigh_parms, rcu_head);
1541 
1542 	neigh_parms_put(parms);
1543 }
1544 
1545 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1546 {
1547 	if (!parms || parms == &tbl->parms)
1548 		return;
1549 	write_lock_bh(&tbl->lock);
1550 	list_del(&parms->list);
1551 	parms->dead = 1;
1552 	write_unlock_bh(&tbl->lock);
1553 	if (parms->dev)
1554 		dev_put(parms->dev);
1555 	call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1556 }
1557 EXPORT_SYMBOL(neigh_parms_release);
1558 
1559 static void neigh_parms_destroy(struct neigh_parms *parms)
1560 {
1561 	kfree(parms);
1562 }
1563 
1564 static struct lock_class_key neigh_table_proxy_queue_class;
1565 
1566 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1567 
1568 void neigh_table_init(int index, struct neigh_table *tbl)
1569 {
1570 	unsigned long now = jiffies;
1571 	unsigned long phsize;
1572 
1573 	INIT_LIST_HEAD(&tbl->parms_list);
1574 	list_add(&tbl->parms.list, &tbl->parms_list);
1575 	write_pnet(&tbl->parms.net, &init_net);
1576 	refcount_set(&tbl->parms.refcnt, 1);
1577 	tbl->parms.reachable_time =
1578 			  neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1579 
1580 	tbl->stats = alloc_percpu(struct neigh_statistics);
1581 	if (!tbl->stats)
1582 		panic("cannot create neighbour cache statistics");
1583 
1584 #ifdef CONFIG_PROC_FS
1585 	if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1586 			      &neigh_stat_seq_ops, tbl))
1587 		panic("cannot create neighbour proc dir entry");
1588 #endif
1589 
1590 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1591 
1592 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1593 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1594 
1595 	if (!tbl->nht || !tbl->phash_buckets)
1596 		panic("cannot allocate neighbour cache hashes");
1597 
1598 	if (!tbl->entry_size)
1599 		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1600 					tbl->key_len, NEIGH_PRIV_ALIGN);
1601 	else
1602 		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1603 
1604 	rwlock_init(&tbl->lock);
1605 	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1606 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1607 			tbl->parms.reachable_time);
1608 	timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1609 	skb_queue_head_init_class(&tbl->proxy_queue,
1610 			&neigh_table_proxy_queue_class);
1611 
1612 	tbl->last_flush = now;
1613 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1614 
1615 	neigh_tables[index] = tbl;
1616 }
1617 EXPORT_SYMBOL(neigh_table_init);
1618 
1619 int neigh_table_clear(int index, struct neigh_table *tbl)
1620 {
1621 	neigh_tables[index] = NULL;
1622 	/* It is not clean... Fix it to unload IPv6 module safely */
1623 	cancel_delayed_work_sync(&tbl->gc_work);
1624 	del_timer_sync(&tbl->proxy_timer);
1625 	pneigh_queue_purge(&tbl->proxy_queue);
1626 	neigh_ifdown(tbl, NULL);
1627 	if (atomic_read(&tbl->entries))
1628 		pr_crit("neighbour leakage\n");
1629 
1630 	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1631 		 neigh_hash_free_rcu);
1632 	tbl->nht = NULL;
1633 
1634 	kfree(tbl->phash_buckets);
1635 	tbl->phash_buckets = NULL;
1636 
1637 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1638 
1639 	free_percpu(tbl->stats);
1640 	tbl->stats = NULL;
1641 
1642 	return 0;
1643 }
1644 EXPORT_SYMBOL(neigh_table_clear);
1645 
1646 static struct neigh_table *neigh_find_table(int family)
1647 {
1648 	struct neigh_table *tbl = NULL;
1649 
1650 	switch (family) {
1651 	case AF_INET:
1652 		tbl = neigh_tables[NEIGH_ARP_TABLE];
1653 		break;
1654 	case AF_INET6:
1655 		tbl = neigh_tables[NEIGH_ND_TABLE];
1656 		break;
1657 	case AF_DECnet:
1658 		tbl = neigh_tables[NEIGH_DN_TABLE];
1659 		break;
1660 	}
1661 
1662 	return tbl;
1663 }
1664 
1665 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1666 			struct netlink_ext_ack *extack)
1667 {
1668 	struct net *net = sock_net(skb->sk);
1669 	struct ndmsg *ndm;
1670 	struct nlattr *dst_attr;
1671 	struct neigh_table *tbl;
1672 	struct neighbour *neigh;
1673 	struct net_device *dev = NULL;
1674 	int err = -EINVAL;
1675 
1676 	ASSERT_RTNL();
1677 	if (nlmsg_len(nlh) < sizeof(*ndm))
1678 		goto out;
1679 
1680 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1681 	if (dst_attr == NULL)
1682 		goto out;
1683 
1684 	ndm = nlmsg_data(nlh);
1685 	if (ndm->ndm_ifindex) {
1686 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1687 		if (dev == NULL) {
1688 			err = -ENODEV;
1689 			goto out;
1690 		}
1691 	}
1692 
1693 	tbl = neigh_find_table(ndm->ndm_family);
1694 	if (tbl == NULL)
1695 		return -EAFNOSUPPORT;
1696 
1697 	if (nla_len(dst_attr) < (int)tbl->key_len)
1698 		goto out;
1699 
1700 	if (ndm->ndm_flags & NTF_PROXY) {
1701 		err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1702 		goto out;
1703 	}
1704 
1705 	if (dev == NULL)
1706 		goto out;
1707 
1708 	neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1709 	if (neigh == NULL) {
1710 		err = -ENOENT;
1711 		goto out;
1712 	}
1713 
1714 	err = neigh_update(neigh, NULL, NUD_FAILED,
1715 			   NEIGH_UPDATE_F_OVERRIDE |
1716 			   NEIGH_UPDATE_F_ADMIN,
1717 			   NETLINK_CB(skb).portid);
1718 	write_lock_bh(&tbl->lock);
1719 	neigh_release(neigh);
1720 	neigh_remove_one(neigh, tbl);
1721 	write_unlock_bh(&tbl->lock);
1722 
1723 out:
1724 	return err;
1725 }
1726 
1727 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1728 		     struct netlink_ext_ack *extack)
1729 {
1730 	int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1731 		NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1732 	struct net *net = sock_net(skb->sk);
1733 	struct ndmsg *ndm;
1734 	struct nlattr *tb[NDA_MAX+1];
1735 	struct neigh_table *tbl;
1736 	struct net_device *dev = NULL;
1737 	struct neighbour *neigh;
1738 	void *dst, *lladdr;
1739 	int err;
1740 
1741 	ASSERT_RTNL();
1742 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
1743 	if (err < 0)
1744 		goto out;
1745 
1746 	err = -EINVAL;
1747 	if (tb[NDA_DST] == NULL)
1748 		goto out;
1749 
1750 	ndm = nlmsg_data(nlh);
1751 	if (ndm->ndm_ifindex) {
1752 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1753 		if (dev == NULL) {
1754 			err = -ENODEV;
1755 			goto out;
1756 		}
1757 
1758 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1759 			goto out;
1760 	}
1761 
1762 	tbl = neigh_find_table(ndm->ndm_family);
1763 	if (tbl == NULL)
1764 		return -EAFNOSUPPORT;
1765 
1766 	if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
1767 		goto out;
1768 	dst = nla_data(tb[NDA_DST]);
1769 	lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1770 
1771 	if (ndm->ndm_flags & NTF_PROXY) {
1772 		struct pneigh_entry *pn;
1773 
1774 		err = -ENOBUFS;
1775 		pn = pneigh_lookup(tbl, net, dst, dev, 1);
1776 		if (pn) {
1777 			pn->flags = ndm->ndm_flags;
1778 			err = 0;
1779 		}
1780 		goto out;
1781 	}
1782 
1783 	if (dev == NULL)
1784 		goto out;
1785 
1786 	neigh = neigh_lookup(tbl, dst, dev);
1787 	if (neigh == NULL) {
1788 		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1789 			err = -ENOENT;
1790 			goto out;
1791 		}
1792 
1793 		neigh = __neigh_lookup_errno(tbl, dst, dev);
1794 		if (IS_ERR(neigh)) {
1795 			err = PTR_ERR(neigh);
1796 			goto out;
1797 		}
1798 	} else {
1799 		if (nlh->nlmsg_flags & NLM_F_EXCL) {
1800 			err = -EEXIST;
1801 			neigh_release(neigh);
1802 			goto out;
1803 		}
1804 
1805 		if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1806 			flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
1807 				   NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1808 	}
1809 
1810 	if (ndm->ndm_flags & NTF_EXT_LEARNED)
1811 		flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1812 
1813 	if (ndm->ndm_flags & NTF_ROUTER)
1814 		flags |= NEIGH_UPDATE_F_ISROUTER;
1815 
1816 	if (ndm->ndm_flags & NTF_USE) {
1817 		neigh_event_send(neigh, NULL);
1818 		err = 0;
1819 	} else
1820 		err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1821 				   NETLINK_CB(skb).portid);
1822 	neigh_release(neigh);
1823 
1824 out:
1825 	return err;
1826 }
1827 
1828 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1829 {
1830 	struct nlattr *nest;
1831 
1832 	nest = nla_nest_start(skb, NDTA_PARMS);
1833 	if (nest == NULL)
1834 		return -ENOBUFS;
1835 
1836 	if ((parms->dev &&
1837 	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1838 	    nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1839 	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1840 			NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1841 	    /* approximative value for deprecated QUEUE_LEN (in packets) */
1842 	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
1843 			NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1844 	    nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1845 	    nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1846 	    nla_put_u32(skb, NDTPA_UCAST_PROBES,
1847 			NEIGH_VAR(parms, UCAST_PROBES)) ||
1848 	    nla_put_u32(skb, NDTPA_MCAST_PROBES,
1849 			NEIGH_VAR(parms, MCAST_PROBES)) ||
1850 	    nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1851 			NEIGH_VAR(parms, MCAST_REPROBES)) ||
1852 	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1853 			  NDTPA_PAD) ||
1854 	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1855 			  NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1856 	    nla_put_msecs(skb, NDTPA_GC_STALETIME,
1857 			  NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1858 	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1859 			  NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1860 	    nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1861 			  NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1862 	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1863 			  NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1864 	    nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1865 			  NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
1866 	    nla_put_msecs(skb, NDTPA_LOCKTIME,
1867 			  NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
1868 		goto nla_put_failure;
1869 	return nla_nest_end(skb, nest);
1870 
1871 nla_put_failure:
1872 	nla_nest_cancel(skb, nest);
1873 	return -EMSGSIZE;
1874 }
1875 
1876 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1877 			      u32 pid, u32 seq, int type, int flags)
1878 {
1879 	struct nlmsghdr *nlh;
1880 	struct ndtmsg *ndtmsg;
1881 
1882 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1883 	if (nlh == NULL)
1884 		return -EMSGSIZE;
1885 
1886 	ndtmsg = nlmsg_data(nlh);
1887 
1888 	read_lock_bh(&tbl->lock);
1889 	ndtmsg->ndtm_family = tbl->family;
1890 	ndtmsg->ndtm_pad1   = 0;
1891 	ndtmsg->ndtm_pad2   = 0;
1892 
1893 	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1894 	    nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
1895 	    nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1896 	    nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1897 	    nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1898 		goto nla_put_failure;
1899 	{
1900 		unsigned long now = jiffies;
1901 		unsigned int flush_delta = now - tbl->last_flush;
1902 		unsigned int rand_delta = now - tbl->last_rand;
1903 		struct neigh_hash_table *nht;
1904 		struct ndt_config ndc = {
1905 			.ndtc_key_len		= tbl->key_len,
1906 			.ndtc_entry_size	= tbl->entry_size,
1907 			.ndtc_entries		= atomic_read(&tbl->entries),
1908 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
1909 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
1910 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
1911 		};
1912 
1913 		rcu_read_lock_bh();
1914 		nht = rcu_dereference_bh(tbl->nht);
1915 		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1916 		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1917 		rcu_read_unlock_bh();
1918 
1919 		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1920 			goto nla_put_failure;
1921 	}
1922 
1923 	{
1924 		int cpu;
1925 		struct ndt_stats ndst;
1926 
1927 		memset(&ndst, 0, sizeof(ndst));
1928 
1929 		for_each_possible_cpu(cpu) {
1930 			struct neigh_statistics	*st;
1931 
1932 			st = per_cpu_ptr(tbl->stats, cpu);
1933 			ndst.ndts_allocs		+= st->allocs;
1934 			ndst.ndts_destroys		+= st->destroys;
1935 			ndst.ndts_hash_grows		+= st->hash_grows;
1936 			ndst.ndts_res_failed		+= st->res_failed;
1937 			ndst.ndts_lookups		+= st->lookups;
1938 			ndst.ndts_hits			+= st->hits;
1939 			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
1940 			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
1941 			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
1942 			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
1943 			ndst.ndts_table_fulls		+= st->table_fulls;
1944 		}
1945 
1946 		if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
1947 				  NDTA_PAD))
1948 			goto nla_put_failure;
1949 	}
1950 
1951 	BUG_ON(tbl->parms.dev);
1952 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1953 		goto nla_put_failure;
1954 
1955 	read_unlock_bh(&tbl->lock);
1956 	nlmsg_end(skb, nlh);
1957 	return 0;
1958 
1959 nla_put_failure:
1960 	read_unlock_bh(&tbl->lock);
1961 	nlmsg_cancel(skb, nlh);
1962 	return -EMSGSIZE;
1963 }
1964 
1965 static int neightbl_fill_param_info(struct sk_buff *skb,
1966 				    struct neigh_table *tbl,
1967 				    struct neigh_parms *parms,
1968 				    u32 pid, u32 seq, int type,
1969 				    unsigned int flags)
1970 {
1971 	struct ndtmsg *ndtmsg;
1972 	struct nlmsghdr *nlh;
1973 
1974 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1975 	if (nlh == NULL)
1976 		return -EMSGSIZE;
1977 
1978 	ndtmsg = nlmsg_data(nlh);
1979 
1980 	read_lock_bh(&tbl->lock);
1981 	ndtmsg->ndtm_family = tbl->family;
1982 	ndtmsg->ndtm_pad1   = 0;
1983 	ndtmsg->ndtm_pad2   = 0;
1984 
1985 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1986 	    neightbl_fill_parms(skb, parms) < 0)
1987 		goto errout;
1988 
1989 	read_unlock_bh(&tbl->lock);
1990 	nlmsg_end(skb, nlh);
1991 	return 0;
1992 errout:
1993 	read_unlock_bh(&tbl->lock);
1994 	nlmsg_cancel(skb, nlh);
1995 	return -EMSGSIZE;
1996 }
1997 
1998 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1999 	[NDTA_NAME]		= { .type = NLA_STRING },
2000 	[NDTA_THRESH1]		= { .type = NLA_U32 },
2001 	[NDTA_THRESH2]		= { .type = NLA_U32 },
2002 	[NDTA_THRESH3]		= { .type = NLA_U32 },
2003 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
2004 	[NDTA_PARMS]		= { .type = NLA_NESTED },
2005 };
2006 
2007 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2008 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
2009 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
2010 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
2011 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
2012 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
2013 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
2014 	[NDTPA_MCAST_REPROBES]		= { .type = NLA_U32 },
2015 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
2016 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
2017 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
2018 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
2019 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
2020 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
2021 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
2022 };
2023 
2024 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2025 			struct netlink_ext_ack *extack)
2026 {
2027 	struct net *net = sock_net(skb->sk);
2028 	struct neigh_table *tbl;
2029 	struct ndtmsg *ndtmsg;
2030 	struct nlattr *tb[NDTA_MAX+1];
2031 	bool found = false;
2032 	int err, tidx;
2033 
2034 	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2035 			  nl_neightbl_policy, extack);
2036 	if (err < 0)
2037 		goto errout;
2038 
2039 	if (tb[NDTA_NAME] == NULL) {
2040 		err = -EINVAL;
2041 		goto errout;
2042 	}
2043 
2044 	ndtmsg = nlmsg_data(nlh);
2045 
2046 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2047 		tbl = neigh_tables[tidx];
2048 		if (!tbl)
2049 			continue;
2050 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2051 			continue;
2052 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2053 			found = true;
2054 			break;
2055 		}
2056 	}
2057 
2058 	if (!found)
2059 		return -ENOENT;
2060 
2061 	/*
2062 	 * We acquire tbl->lock to be nice to the periodic timers and
2063 	 * make sure they always see a consistent set of values.
2064 	 */
2065 	write_lock_bh(&tbl->lock);
2066 
2067 	if (tb[NDTA_PARMS]) {
2068 		struct nlattr *tbp[NDTPA_MAX+1];
2069 		struct neigh_parms *p;
2070 		int i, ifindex = 0;
2071 
2072 		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2073 				       nl_ntbl_parm_policy, extack);
2074 		if (err < 0)
2075 			goto errout_tbl_lock;
2076 
2077 		if (tbp[NDTPA_IFINDEX])
2078 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2079 
2080 		p = lookup_neigh_parms(tbl, net, ifindex);
2081 		if (p == NULL) {
2082 			err = -ENOENT;
2083 			goto errout_tbl_lock;
2084 		}
2085 
2086 		for (i = 1; i <= NDTPA_MAX; i++) {
2087 			if (tbp[i] == NULL)
2088 				continue;
2089 
2090 			switch (i) {
2091 			case NDTPA_QUEUE_LEN:
2092 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2093 					      nla_get_u32(tbp[i]) *
2094 					      SKB_TRUESIZE(ETH_FRAME_LEN));
2095 				break;
2096 			case NDTPA_QUEUE_LENBYTES:
2097 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2098 					      nla_get_u32(tbp[i]));
2099 				break;
2100 			case NDTPA_PROXY_QLEN:
2101 				NEIGH_VAR_SET(p, PROXY_QLEN,
2102 					      nla_get_u32(tbp[i]));
2103 				break;
2104 			case NDTPA_APP_PROBES:
2105 				NEIGH_VAR_SET(p, APP_PROBES,
2106 					      nla_get_u32(tbp[i]));
2107 				break;
2108 			case NDTPA_UCAST_PROBES:
2109 				NEIGH_VAR_SET(p, UCAST_PROBES,
2110 					      nla_get_u32(tbp[i]));
2111 				break;
2112 			case NDTPA_MCAST_PROBES:
2113 				NEIGH_VAR_SET(p, MCAST_PROBES,
2114 					      nla_get_u32(tbp[i]));
2115 				break;
2116 			case NDTPA_MCAST_REPROBES:
2117 				NEIGH_VAR_SET(p, MCAST_REPROBES,
2118 					      nla_get_u32(tbp[i]));
2119 				break;
2120 			case NDTPA_BASE_REACHABLE_TIME:
2121 				NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2122 					      nla_get_msecs(tbp[i]));
2123 				/* update reachable_time as well, otherwise, the change will
2124 				 * only be effective after the next time neigh_periodic_work
2125 				 * decides to recompute it (can be multiple minutes)
2126 				 */
2127 				p->reachable_time =
2128 					neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2129 				break;
2130 			case NDTPA_GC_STALETIME:
2131 				NEIGH_VAR_SET(p, GC_STALETIME,
2132 					      nla_get_msecs(tbp[i]));
2133 				break;
2134 			case NDTPA_DELAY_PROBE_TIME:
2135 				NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2136 					      nla_get_msecs(tbp[i]));
2137 				call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2138 				break;
2139 			case NDTPA_RETRANS_TIME:
2140 				NEIGH_VAR_SET(p, RETRANS_TIME,
2141 					      nla_get_msecs(tbp[i]));
2142 				break;
2143 			case NDTPA_ANYCAST_DELAY:
2144 				NEIGH_VAR_SET(p, ANYCAST_DELAY,
2145 					      nla_get_msecs(tbp[i]));
2146 				break;
2147 			case NDTPA_PROXY_DELAY:
2148 				NEIGH_VAR_SET(p, PROXY_DELAY,
2149 					      nla_get_msecs(tbp[i]));
2150 				break;
2151 			case NDTPA_LOCKTIME:
2152 				NEIGH_VAR_SET(p, LOCKTIME,
2153 					      nla_get_msecs(tbp[i]));
2154 				break;
2155 			}
2156 		}
2157 	}
2158 
2159 	err = -ENOENT;
2160 	if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2161 	     tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2162 	    !net_eq(net, &init_net))
2163 		goto errout_tbl_lock;
2164 
2165 	if (tb[NDTA_THRESH1])
2166 		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2167 
2168 	if (tb[NDTA_THRESH2])
2169 		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2170 
2171 	if (tb[NDTA_THRESH3])
2172 		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2173 
2174 	if (tb[NDTA_GC_INTERVAL])
2175 		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2176 
2177 	err = 0;
2178 
2179 errout_tbl_lock:
2180 	write_unlock_bh(&tbl->lock);
2181 errout:
2182 	return err;
2183 }
2184 
2185 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2186 				    struct netlink_ext_ack *extack)
2187 {
2188 	struct ndtmsg *ndtm;
2189 
2190 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2191 		NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2192 		return -EINVAL;
2193 	}
2194 
2195 	ndtm = nlmsg_data(nlh);
2196 	if (ndtm->ndtm_pad1  || ndtm->ndtm_pad2) {
2197 		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2198 		return -EINVAL;
2199 	}
2200 
2201 	if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2202 		NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2203 		return -EINVAL;
2204 	}
2205 
2206 	return 0;
2207 }
2208 
2209 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2210 {
2211 	const struct nlmsghdr *nlh = cb->nlh;
2212 	struct net *net = sock_net(skb->sk);
2213 	int family, tidx, nidx = 0;
2214 	int tbl_skip = cb->args[0];
2215 	int neigh_skip = cb->args[1];
2216 	struct neigh_table *tbl;
2217 
2218 	if (cb->strict_check) {
2219 		int err = neightbl_valid_dump_info(nlh, cb->extack);
2220 
2221 		if (err < 0)
2222 			return err;
2223 	}
2224 
2225 	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2226 
2227 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2228 		struct neigh_parms *p;
2229 
2230 		tbl = neigh_tables[tidx];
2231 		if (!tbl)
2232 			continue;
2233 
2234 		if (tidx < tbl_skip || (family && tbl->family != family))
2235 			continue;
2236 
2237 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2238 				       nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2239 				       NLM_F_MULTI) < 0)
2240 			break;
2241 
2242 		nidx = 0;
2243 		p = list_next_entry(&tbl->parms, list);
2244 		list_for_each_entry_from(p, &tbl->parms_list, list) {
2245 			if (!net_eq(neigh_parms_net(p), net))
2246 				continue;
2247 
2248 			if (nidx < neigh_skip)
2249 				goto next;
2250 
2251 			if (neightbl_fill_param_info(skb, tbl, p,
2252 						     NETLINK_CB(cb->skb).portid,
2253 						     nlh->nlmsg_seq,
2254 						     RTM_NEWNEIGHTBL,
2255 						     NLM_F_MULTI) < 0)
2256 				goto out;
2257 		next:
2258 			nidx++;
2259 		}
2260 
2261 		neigh_skip = 0;
2262 	}
2263 out:
2264 	cb->args[0] = tidx;
2265 	cb->args[1] = nidx;
2266 
2267 	return skb->len;
2268 }
2269 
2270 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2271 			   u32 pid, u32 seq, int type, unsigned int flags)
2272 {
2273 	unsigned long now = jiffies;
2274 	struct nda_cacheinfo ci;
2275 	struct nlmsghdr *nlh;
2276 	struct ndmsg *ndm;
2277 
2278 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2279 	if (nlh == NULL)
2280 		return -EMSGSIZE;
2281 
2282 	ndm = nlmsg_data(nlh);
2283 	ndm->ndm_family	 = neigh->ops->family;
2284 	ndm->ndm_pad1    = 0;
2285 	ndm->ndm_pad2    = 0;
2286 	ndm->ndm_flags	 = neigh->flags;
2287 	ndm->ndm_type	 = neigh->type;
2288 	ndm->ndm_ifindex = neigh->dev->ifindex;
2289 
2290 	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2291 		goto nla_put_failure;
2292 
2293 	read_lock_bh(&neigh->lock);
2294 	ndm->ndm_state	 = neigh->nud_state;
2295 	if (neigh->nud_state & NUD_VALID) {
2296 		char haddr[MAX_ADDR_LEN];
2297 
2298 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2299 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2300 			read_unlock_bh(&neigh->lock);
2301 			goto nla_put_failure;
2302 		}
2303 	}
2304 
2305 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2306 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2307 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2308 	ci.ndm_refcnt	 = refcount_read(&neigh->refcnt) - 1;
2309 	read_unlock_bh(&neigh->lock);
2310 
2311 	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2312 	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2313 		goto nla_put_failure;
2314 
2315 	nlmsg_end(skb, nlh);
2316 	return 0;
2317 
2318 nla_put_failure:
2319 	nlmsg_cancel(skb, nlh);
2320 	return -EMSGSIZE;
2321 }
2322 
2323 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2324 			    u32 pid, u32 seq, int type, unsigned int flags,
2325 			    struct neigh_table *tbl)
2326 {
2327 	struct nlmsghdr *nlh;
2328 	struct ndmsg *ndm;
2329 
2330 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2331 	if (nlh == NULL)
2332 		return -EMSGSIZE;
2333 
2334 	ndm = nlmsg_data(nlh);
2335 	ndm->ndm_family	 = tbl->family;
2336 	ndm->ndm_pad1    = 0;
2337 	ndm->ndm_pad2    = 0;
2338 	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
2339 	ndm->ndm_type	 = RTN_UNICAST;
2340 	ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2341 	ndm->ndm_state	 = NUD_NONE;
2342 
2343 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2344 		goto nla_put_failure;
2345 
2346 	nlmsg_end(skb, nlh);
2347 	return 0;
2348 
2349 nla_put_failure:
2350 	nlmsg_cancel(skb, nlh);
2351 	return -EMSGSIZE;
2352 }
2353 
2354 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2355 {
2356 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2357 	__neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2358 }
2359 
2360 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2361 {
2362 	struct net_device *master;
2363 
2364 	if (!master_idx)
2365 		return false;
2366 
2367 	master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2368 	if (!master || master->ifindex != master_idx)
2369 		return true;
2370 
2371 	return false;
2372 }
2373 
2374 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2375 {
2376 	if (filter_idx && (!dev || dev->ifindex != filter_idx))
2377 		return true;
2378 
2379 	return false;
2380 }
2381 
2382 struct neigh_dump_filter {
2383 	int master_idx;
2384 	int dev_idx;
2385 };
2386 
2387 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2388 			    struct netlink_callback *cb,
2389 			    struct neigh_dump_filter *filter)
2390 {
2391 	struct net *net = sock_net(skb->sk);
2392 	struct neighbour *n;
2393 	int rc, h, s_h = cb->args[1];
2394 	int idx, s_idx = idx = cb->args[2];
2395 	struct neigh_hash_table *nht;
2396 	unsigned int flags = NLM_F_MULTI;
2397 
2398 	if (filter->dev_idx || filter->master_idx)
2399 		flags |= NLM_F_DUMP_FILTERED;
2400 
2401 	rcu_read_lock_bh();
2402 	nht = rcu_dereference_bh(tbl->nht);
2403 
2404 	for (h = s_h; h < (1 << nht->hash_shift); h++) {
2405 		if (h > s_h)
2406 			s_idx = 0;
2407 		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2408 		     n != NULL;
2409 		     n = rcu_dereference_bh(n->next)) {
2410 			if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2411 				goto next;
2412 			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2413 			    neigh_master_filtered(n->dev, filter->master_idx))
2414 				goto next;
2415 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2416 					    cb->nlh->nlmsg_seq,
2417 					    RTM_NEWNEIGH,
2418 					    flags) < 0) {
2419 				rc = -1;
2420 				goto out;
2421 			}
2422 next:
2423 			idx++;
2424 		}
2425 	}
2426 	rc = skb->len;
2427 out:
2428 	rcu_read_unlock_bh();
2429 	cb->args[1] = h;
2430 	cb->args[2] = idx;
2431 	return rc;
2432 }
2433 
2434 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2435 			     struct netlink_callback *cb,
2436 			     struct neigh_dump_filter *filter)
2437 {
2438 	struct pneigh_entry *n;
2439 	struct net *net = sock_net(skb->sk);
2440 	int rc, h, s_h = cb->args[3];
2441 	int idx, s_idx = idx = cb->args[4];
2442 	unsigned int flags = NLM_F_MULTI;
2443 
2444 	if (filter->dev_idx || filter->master_idx)
2445 		flags |= NLM_F_DUMP_FILTERED;
2446 
2447 	read_lock_bh(&tbl->lock);
2448 
2449 	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2450 		if (h > s_h)
2451 			s_idx = 0;
2452 		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2453 			if (idx < s_idx || pneigh_net(n) != net)
2454 				goto next;
2455 			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2456 			    neigh_master_filtered(n->dev, filter->master_idx))
2457 				goto next;
2458 			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2459 					    cb->nlh->nlmsg_seq,
2460 					    RTM_NEWNEIGH, flags, tbl) < 0) {
2461 				read_unlock_bh(&tbl->lock);
2462 				rc = -1;
2463 				goto out;
2464 			}
2465 		next:
2466 			idx++;
2467 		}
2468 	}
2469 
2470 	read_unlock_bh(&tbl->lock);
2471 	rc = skb->len;
2472 out:
2473 	cb->args[3] = h;
2474 	cb->args[4] = idx;
2475 	return rc;
2476 
2477 }
2478 
2479 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2480 				bool strict_check,
2481 				struct neigh_dump_filter *filter,
2482 				struct netlink_ext_ack *extack)
2483 {
2484 	struct nlattr *tb[NDA_MAX + 1];
2485 	int err, i;
2486 
2487 	if (strict_check) {
2488 		struct ndmsg *ndm;
2489 
2490 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2491 			NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2492 			return -EINVAL;
2493 		}
2494 
2495 		ndm = nlmsg_data(nlh);
2496 		if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_ifindex ||
2497 		    ndm->ndm_state || ndm->ndm_flags || ndm->ndm_type) {
2498 			NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2499 			return -EINVAL;
2500 		}
2501 
2502 		err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2503 					 NULL, extack);
2504 	} else {
2505 		err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2506 				  NULL, extack);
2507 	}
2508 	if (err < 0)
2509 		return err;
2510 
2511 	for (i = 0; i <= NDA_MAX; ++i) {
2512 		if (!tb[i])
2513 			continue;
2514 
2515 		/* all new attributes should require strict_check */
2516 		switch (i) {
2517 		case NDA_IFINDEX:
2518 			if (nla_len(tb[i]) != sizeof(u32)) {
2519 				NL_SET_ERR_MSG(extack, "Invalid IFINDEX attribute in neighbor dump request");
2520 				return -EINVAL;
2521 			}
2522 			filter->dev_idx = nla_get_u32(tb[i]);
2523 			break;
2524 		case NDA_MASTER:
2525 			if (nla_len(tb[i]) != sizeof(u32)) {
2526 				NL_SET_ERR_MSG(extack, "Invalid MASTER attribute in neighbor dump request");
2527 				return -EINVAL;
2528 			}
2529 			filter->master_idx = nla_get_u32(tb[i]);
2530 			break;
2531 		default:
2532 			if (strict_check) {
2533 				NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2534 				return -EINVAL;
2535 			}
2536 		}
2537 	}
2538 
2539 	return 0;
2540 }
2541 
2542 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2543 {
2544 	const struct nlmsghdr *nlh = cb->nlh;
2545 	struct neigh_dump_filter filter = {};
2546 	struct neigh_table *tbl;
2547 	int t, family, s_t;
2548 	int proxy = 0;
2549 	int err;
2550 
2551 	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2552 
2553 	/* check for full ndmsg structure presence, family member is
2554 	 * the same for both structures
2555 	 */
2556 	if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2557 	    ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2558 		proxy = 1;
2559 
2560 	err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2561 	if (err < 0 && cb->strict_check)
2562 		return err;
2563 
2564 	s_t = cb->args[0];
2565 
2566 	for (t = 0; t < NEIGH_NR_TABLES; t++) {
2567 		tbl = neigh_tables[t];
2568 
2569 		if (!tbl)
2570 			continue;
2571 		if (t < s_t || (family && tbl->family != family))
2572 			continue;
2573 		if (t > s_t)
2574 			memset(&cb->args[1], 0, sizeof(cb->args) -
2575 						sizeof(cb->args[0]));
2576 		if (proxy)
2577 			err = pneigh_dump_table(tbl, skb, cb, &filter);
2578 		else
2579 			err = neigh_dump_table(tbl, skb, cb, &filter);
2580 		if (err < 0)
2581 			break;
2582 	}
2583 
2584 	cb->args[0] = t;
2585 	return skb->len;
2586 }
2587 
2588 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2589 {
2590 	int chain;
2591 	struct neigh_hash_table *nht;
2592 
2593 	rcu_read_lock_bh();
2594 	nht = rcu_dereference_bh(tbl->nht);
2595 
2596 	read_lock(&tbl->lock); /* avoid resizes */
2597 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2598 		struct neighbour *n;
2599 
2600 		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2601 		     n != NULL;
2602 		     n = rcu_dereference_bh(n->next))
2603 			cb(n, cookie);
2604 	}
2605 	read_unlock(&tbl->lock);
2606 	rcu_read_unlock_bh();
2607 }
2608 EXPORT_SYMBOL(neigh_for_each);
2609 
2610 /* The tbl->lock must be held as a writer and BH disabled. */
2611 void __neigh_for_each_release(struct neigh_table *tbl,
2612 			      int (*cb)(struct neighbour *))
2613 {
2614 	int chain;
2615 	struct neigh_hash_table *nht;
2616 
2617 	nht = rcu_dereference_protected(tbl->nht,
2618 					lockdep_is_held(&tbl->lock));
2619 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2620 		struct neighbour *n;
2621 		struct neighbour __rcu **np;
2622 
2623 		np = &nht->hash_buckets[chain];
2624 		while ((n = rcu_dereference_protected(*np,
2625 					lockdep_is_held(&tbl->lock))) != NULL) {
2626 			int release;
2627 
2628 			write_lock(&n->lock);
2629 			release = cb(n);
2630 			if (release) {
2631 				rcu_assign_pointer(*np,
2632 					rcu_dereference_protected(n->next,
2633 						lockdep_is_held(&tbl->lock)));
2634 				n->dead = 1;
2635 			} else
2636 				np = &n->next;
2637 			write_unlock(&n->lock);
2638 			if (release)
2639 				neigh_cleanup_and_release(n);
2640 		}
2641 	}
2642 }
2643 EXPORT_SYMBOL(__neigh_for_each_release);
2644 
2645 int neigh_xmit(int index, struct net_device *dev,
2646 	       const void *addr, struct sk_buff *skb)
2647 {
2648 	int err = -EAFNOSUPPORT;
2649 	if (likely(index < NEIGH_NR_TABLES)) {
2650 		struct neigh_table *tbl;
2651 		struct neighbour *neigh;
2652 
2653 		tbl = neigh_tables[index];
2654 		if (!tbl)
2655 			goto out;
2656 		rcu_read_lock_bh();
2657 		neigh = __neigh_lookup_noref(tbl, addr, dev);
2658 		if (!neigh)
2659 			neigh = __neigh_create(tbl, addr, dev, false);
2660 		err = PTR_ERR(neigh);
2661 		if (IS_ERR(neigh)) {
2662 			rcu_read_unlock_bh();
2663 			goto out_kfree_skb;
2664 		}
2665 		err = neigh->output(neigh, skb);
2666 		rcu_read_unlock_bh();
2667 	}
2668 	else if (index == NEIGH_LINK_TABLE) {
2669 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2670 				      addr, NULL, skb->len);
2671 		if (err < 0)
2672 			goto out_kfree_skb;
2673 		err = dev_queue_xmit(skb);
2674 	}
2675 out:
2676 	return err;
2677 out_kfree_skb:
2678 	kfree_skb(skb);
2679 	goto out;
2680 }
2681 EXPORT_SYMBOL(neigh_xmit);
2682 
2683 #ifdef CONFIG_PROC_FS
2684 
2685 static struct neighbour *neigh_get_first(struct seq_file *seq)
2686 {
2687 	struct neigh_seq_state *state = seq->private;
2688 	struct net *net = seq_file_net(seq);
2689 	struct neigh_hash_table *nht = state->nht;
2690 	struct neighbour *n = NULL;
2691 	int bucket = state->bucket;
2692 
2693 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2694 	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2695 		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2696 
2697 		while (n) {
2698 			if (!net_eq(dev_net(n->dev), net))
2699 				goto next;
2700 			if (state->neigh_sub_iter) {
2701 				loff_t fakep = 0;
2702 				void *v;
2703 
2704 				v = state->neigh_sub_iter(state, n, &fakep);
2705 				if (!v)
2706 					goto next;
2707 			}
2708 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2709 				break;
2710 			if (n->nud_state & ~NUD_NOARP)
2711 				break;
2712 next:
2713 			n = rcu_dereference_bh(n->next);
2714 		}
2715 
2716 		if (n)
2717 			break;
2718 	}
2719 	state->bucket = bucket;
2720 
2721 	return n;
2722 }
2723 
2724 static struct neighbour *neigh_get_next(struct seq_file *seq,
2725 					struct neighbour *n,
2726 					loff_t *pos)
2727 {
2728 	struct neigh_seq_state *state = seq->private;
2729 	struct net *net = seq_file_net(seq);
2730 	struct neigh_hash_table *nht = state->nht;
2731 
2732 	if (state->neigh_sub_iter) {
2733 		void *v = state->neigh_sub_iter(state, n, pos);
2734 		if (v)
2735 			return n;
2736 	}
2737 	n = rcu_dereference_bh(n->next);
2738 
2739 	while (1) {
2740 		while (n) {
2741 			if (!net_eq(dev_net(n->dev), net))
2742 				goto next;
2743 			if (state->neigh_sub_iter) {
2744 				void *v = state->neigh_sub_iter(state, n, pos);
2745 				if (v)
2746 					return n;
2747 				goto next;
2748 			}
2749 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2750 				break;
2751 
2752 			if (n->nud_state & ~NUD_NOARP)
2753 				break;
2754 next:
2755 			n = rcu_dereference_bh(n->next);
2756 		}
2757 
2758 		if (n)
2759 			break;
2760 
2761 		if (++state->bucket >= (1 << nht->hash_shift))
2762 			break;
2763 
2764 		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2765 	}
2766 
2767 	if (n && pos)
2768 		--(*pos);
2769 	return n;
2770 }
2771 
2772 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2773 {
2774 	struct neighbour *n = neigh_get_first(seq);
2775 
2776 	if (n) {
2777 		--(*pos);
2778 		while (*pos) {
2779 			n = neigh_get_next(seq, n, pos);
2780 			if (!n)
2781 				break;
2782 		}
2783 	}
2784 	return *pos ? NULL : n;
2785 }
2786 
2787 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2788 {
2789 	struct neigh_seq_state *state = seq->private;
2790 	struct net *net = seq_file_net(seq);
2791 	struct neigh_table *tbl = state->tbl;
2792 	struct pneigh_entry *pn = NULL;
2793 	int bucket = state->bucket;
2794 
2795 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
2796 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2797 		pn = tbl->phash_buckets[bucket];
2798 		while (pn && !net_eq(pneigh_net(pn), net))
2799 			pn = pn->next;
2800 		if (pn)
2801 			break;
2802 	}
2803 	state->bucket = bucket;
2804 
2805 	return pn;
2806 }
2807 
2808 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2809 					    struct pneigh_entry *pn,
2810 					    loff_t *pos)
2811 {
2812 	struct neigh_seq_state *state = seq->private;
2813 	struct net *net = seq_file_net(seq);
2814 	struct neigh_table *tbl = state->tbl;
2815 
2816 	do {
2817 		pn = pn->next;
2818 	} while (pn && !net_eq(pneigh_net(pn), net));
2819 
2820 	while (!pn) {
2821 		if (++state->bucket > PNEIGH_HASHMASK)
2822 			break;
2823 		pn = tbl->phash_buckets[state->bucket];
2824 		while (pn && !net_eq(pneigh_net(pn), net))
2825 			pn = pn->next;
2826 		if (pn)
2827 			break;
2828 	}
2829 
2830 	if (pn && pos)
2831 		--(*pos);
2832 
2833 	return pn;
2834 }
2835 
2836 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2837 {
2838 	struct pneigh_entry *pn = pneigh_get_first(seq);
2839 
2840 	if (pn) {
2841 		--(*pos);
2842 		while (*pos) {
2843 			pn = pneigh_get_next(seq, pn, pos);
2844 			if (!pn)
2845 				break;
2846 		}
2847 	}
2848 	return *pos ? NULL : pn;
2849 }
2850 
2851 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2852 {
2853 	struct neigh_seq_state *state = seq->private;
2854 	void *rc;
2855 	loff_t idxpos = *pos;
2856 
2857 	rc = neigh_get_idx(seq, &idxpos);
2858 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2859 		rc = pneigh_get_idx(seq, &idxpos);
2860 
2861 	return rc;
2862 }
2863 
2864 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2865 	__acquires(rcu_bh)
2866 {
2867 	struct neigh_seq_state *state = seq->private;
2868 
2869 	state->tbl = tbl;
2870 	state->bucket = 0;
2871 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2872 
2873 	rcu_read_lock_bh();
2874 	state->nht = rcu_dereference_bh(tbl->nht);
2875 
2876 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2877 }
2878 EXPORT_SYMBOL(neigh_seq_start);
2879 
2880 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2881 {
2882 	struct neigh_seq_state *state;
2883 	void *rc;
2884 
2885 	if (v == SEQ_START_TOKEN) {
2886 		rc = neigh_get_first(seq);
2887 		goto out;
2888 	}
2889 
2890 	state = seq->private;
2891 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2892 		rc = neigh_get_next(seq, v, NULL);
2893 		if (rc)
2894 			goto out;
2895 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2896 			rc = pneigh_get_first(seq);
2897 	} else {
2898 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2899 		rc = pneigh_get_next(seq, v, NULL);
2900 	}
2901 out:
2902 	++(*pos);
2903 	return rc;
2904 }
2905 EXPORT_SYMBOL(neigh_seq_next);
2906 
2907 void neigh_seq_stop(struct seq_file *seq, void *v)
2908 	__releases(rcu_bh)
2909 {
2910 	rcu_read_unlock_bh();
2911 }
2912 EXPORT_SYMBOL(neigh_seq_stop);
2913 
2914 /* statistics via seq_file */
2915 
2916 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2917 {
2918 	struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2919 	int cpu;
2920 
2921 	if (*pos == 0)
2922 		return SEQ_START_TOKEN;
2923 
2924 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2925 		if (!cpu_possible(cpu))
2926 			continue;
2927 		*pos = cpu+1;
2928 		return per_cpu_ptr(tbl->stats, cpu);
2929 	}
2930 	return NULL;
2931 }
2932 
2933 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2934 {
2935 	struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2936 	int cpu;
2937 
2938 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2939 		if (!cpu_possible(cpu))
2940 			continue;
2941 		*pos = cpu+1;
2942 		return per_cpu_ptr(tbl->stats, cpu);
2943 	}
2944 	return NULL;
2945 }
2946 
2947 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2948 {
2949 
2950 }
2951 
2952 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2953 {
2954 	struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2955 	struct neigh_statistics *st = v;
2956 
2957 	if (v == SEQ_START_TOKEN) {
2958 		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
2959 		return 0;
2960 	}
2961 
2962 	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2963 			"%08lx %08lx  %08lx %08lx %08lx %08lx\n",
2964 		   atomic_read(&tbl->entries),
2965 
2966 		   st->allocs,
2967 		   st->destroys,
2968 		   st->hash_grows,
2969 
2970 		   st->lookups,
2971 		   st->hits,
2972 
2973 		   st->res_failed,
2974 
2975 		   st->rcv_probes_mcast,
2976 		   st->rcv_probes_ucast,
2977 
2978 		   st->periodic_gc_runs,
2979 		   st->forced_gc_runs,
2980 		   st->unres_discards,
2981 		   st->table_fulls
2982 		   );
2983 
2984 	return 0;
2985 }
2986 
2987 static const struct seq_operations neigh_stat_seq_ops = {
2988 	.start	= neigh_stat_seq_start,
2989 	.next	= neigh_stat_seq_next,
2990 	.stop	= neigh_stat_seq_stop,
2991 	.show	= neigh_stat_seq_show,
2992 };
2993 #endif /* CONFIG_PROC_FS */
2994 
2995 static inline size_t neigh_nlmsg_size(void)
2996 {
2997 	return NLMSG_ALIGN(sizeof(struct ndmsg))
2998 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2999 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
3000 	       + nla_total_size(sizeof(struct nda_cacheinfo))
3001 	       + nla_total_size(4); /* NDA_PROBES */
3002 }
3003 
3004 static void __neigh_notify(struct neighbour *n, int type, int flags,
3005 			   u32 pid)
3006 {
3007 	struct net *net = dev_net(n->dev);
3008 	struct sk_buff *skb;
3009 	int err = -ENOBUFS;
3010 
3011 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3012 	if (skb == NULL)
3013 		goto errout;
3014 
3015 	err = neigh_fill_info(skb, n, pid, 0, type, flags);
3016 	if (err < 0) {
3017 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3018 		WARN_ON(err == -EMSGSIZE);
3019 		kfree_skb(skb);
3020 		goto errout;
3021 	}
3022 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3023 	return;
3024 errout:
3025 	if (err < 0)
3026 		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3027 }
3028 
3029 void neigh_app_ns(struct neighbour *n)
3030 {
3031 	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3032 }
3033 EXPORT_SYMBOL(neigh_app_ns);
3034 
3035 #ifdef CONFIG_SYSCTL
3036 static int zero;
3037 static int int_max = INT_MAX;
3038 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3039 
3040 static int proc_unres_qlen(struct ctl_table *ctl, int write,
3041 			   void __user *buffer, size_t *lenp, loff_t *ppos)
3042 {
3043 	int size, ret;
3044 	struct ctl_table tmp = *ctl;
3045 
3046 	tmp.extra1 = &zero;
3047 	tmp.extra2 = &unres_qlen_max;
3048 	tmp.data = &size;
3049 
3050 	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3051 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3052 
3053 	if (write && !ret)
3054 		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3055 	return ret;
3056 }
3057 
3058 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
3059 						   int family)
3060 {
3061 	switch (family) {
3062 	case AF_INET:
3063 		return __in_dev_arp_parms_get_rcu(dev);
3064 	case AF_INET6:
3065 		return __in6_dev_nd_parms_get_rcu(dev);
3066 	}
3067 	return NULL;
3068 }
3069 
3070 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3071 				  int index)
3072 {
3073 	struct net_device *dev;
3074 	int family = neigh_parms_family(p);
3075 
3076 	rcu_read_lock();
3077 	for_each_netdev_rcu(net, dev) {
3078 		struct neigh_parms *dst_p =
3079 				neigh_get_dev_parms_rcu(dev, family);
3080 
3081 		if (dst_p && !test_bit(index, dst_p->data_state))
3082 			dst_p->data[index] = p->data[index];
3083 	}
3084 	rcu_read_unlock();
3085 }
3086 
3087 static void neigh_proc_update(struct ctl_table *ctl, int write)
3088 {
3089 	struct net_device *dev = ctl->extra1;
3090 	struct neigh_parms *p = ctl->extra2;
3091 	struct net *net = neigh_parms_net(p);
3092 	int index = (int *) ctl->data - p->data;
3093 
3094 	if (!write)
3095 		return;
3096 
3097 	set_bit(index, p->data_state);
3098 	if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3099 		call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3100 	if (!dev) /* NULL dev means this is default value */
3101 		neigh_copy_dflt_parms(net, p, index);
3102 }
3103 
3104 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3105 					   void __user *buffer,
3106 					   size_t *lenp, loff_t *ppos)
3107 {
3108 	struct ctl_table tmp = *ctl;
3109 	int ret;
3110 
3111 	tmp.extra1 = &zero;
3112 	tmp.extra2 = &int_max;
3113 
3114 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3115 	neigh_proc_update(ctl, write);
3116 	return ret;
3117 }
3118 
3119 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3120 			void __user *buffer, size_t *lenp, loff_t *ppos)
3121 {
3122 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3123 
3124 	neigh_proc_update(ctl, write);
3125 	return ret;
3126 }
3127 EXPORT_SYMBOL(neigh_proc_dointvec);
3128 
3129 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3130 				void __user *buffer,
3131 				size_t *lenp, loff_t *ppos)
3132 {
3133 	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3134 
3135 	neigh_proc_update(ctl, write);
3136 	return ret;
3137 }
3138 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3139 
3140 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3141 					      void __user *buffer,
3142 					      size_t *lenp, loff_t *ppos)
3143 {
3144 	int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3145 
3146 	neigh_proc_update(ctl, write);
3147 	return ret;
3148 }
3149 
3150 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3151 				   void __user *buffer,
3152 				   size_t *lenp, loff_t *ppos)
3153 {
3154 	int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3155 
3156 	neigh_proc_update(ctl, write);
3157 	return ret;
3158 }
3159 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3160 
3161 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3162 					  void __user *buffer,
3163 					  size_t *lenp, loff_t *ppos)
3164 {
3165 	int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3166 
3167 	neigh_proc_update(ctl, write);
3168 	return ret;
3169 }
3170 
3171 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3172 					  void __user *buffer,
3173 					  size_t *lenp, loff_t *ppos)
3174 {
3175 	struct neigh_parms *p = ctl->extra2;
3176 	int ret;
3177 
3178 	if (strcmp(ctl->procname, "base_reachable_time") == 0)
3179 		ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3180 	else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3181 		ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3182 	else
3183 		ret = -1;
3184 
3185 	if (write && ret == 0) {
3186 		/* update reachable_time as well, otherwise, the change will
3187 		 * only be effective after the next time neigh_periodic_work
3188 		 * decides to recompute it
3189 		 */
3190 		p->reachable_time =
3191 			neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3192 	}
3193 	return ret;
3194 }
3195 
3196 #define NEIGH_PARMS_DATA_OFFSET(index)	\
3197 	(&((struct neigh_parms *) 0)->data[index])
3198 
3199 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3200 	[NEIGH_VAR_ ## attr] = { \
3201 		.procname	= name, \
3202 		.data		= NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3203 		.maxlen		= sizeof(int), \
3204 		.mode		= mval, \
3205 		.proc_handler	= proc, \
3206 	}
3207 
3208 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3209 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3210 
3211 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3212 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3213 
3214 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3215 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3216 
3217 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3218 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3219 
3220 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3221 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3222 
3223 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3224 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3225 
3226 static struct neigh_sysctl_table {
3227 	struct ctl_table_header *sysctl_header;
3228 	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3229 } neigh_sysctl_template __read_mostly = {
3230 	.neigh_vars = {
3231 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3232 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3233 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3234 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3235 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3236 		NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3237 		NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3238 		NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3239 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3240 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3241 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3242 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3243 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3244 		NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3245 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3246 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3247 		[NEIGH_VAR_GC_INTERVAL] = {
3248 			.procname	= "gc_interval",
3249 			.maxlen		= sizeof(int),
3250 			.mode		= 0644,
3251 			.proc_handler	= proc_dointvec_jiffies,
3252 		},
3253 		[NEIGH_VAR_GC_THRESH1] = {
3254 			.procname	= "gc_thresh1",
3255 			.maxlen		= sizeof(int),
3256 			.mode		= 0644,
3257 			.extra1 	= &zero,
3258 			.extra2		= &int_max,
3259 			.proc_handler	= proc_dointvec_minmax,
3260 		},
3261 		[NEIGH_VAR_GC_THRESH2] = {
3262 			.procname	= "gc_thresh2",
3263 			.maxlen		= sizeof(int),
3264 			.mode		= 0644,
3265 			.extra1 	= &zero,
3266 			.extra2		= &int_max,
3267 			.proc_handler	= proc_dointvec_minmax,
3268 		},
3269 		[NEIGH_VAR_GC_THRESH3] = {
3270 			.procname	= "gc_thresh3",
3271 			.maxlen		= sizeof(int),
3272 			.mode		= 0644,
3273 			.extra1 	= &zero,
3274 			.extra2		= &int_max,
3275 			.proc_handler	= proc_dointvec_minmax,
3276 		},
3277 		{},
3278 	},
3279 };
3280 
3281 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3282 			  proc_handler *handler)
3283 {
3284 	int i;
3285 	struct neigh_sysctl_table *t;
3286 	const char *dev_name_source;
3287 	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3288 	char *p_name;
3289 
3290 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3291 	if (!t)
3292 		goto err;
3293 
3294 	for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3295 		t->neigh_vars[i].data += (long) p;
3296 		t->neigh_vars[i].extra1 = dev;
3297 		t->neigh_vars[i].extra2 = p;
3298 	}
3299 
3300 	if (dev) {
3301 		dev_name_source = dev->name;
3302 		/* Terminate the table early */
3303 		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3304 		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3305 	} else {
3306 		struct neigh_table *tbl = p->tbl;
3307 		dev_name_source = "default";
3308 		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3309 		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3310 		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3311 		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3312 	}
3313 
3314 	if (handler) {
3315 		/* RetransTime */
3316 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3317 		/* ReachableTime */
3318 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3319 		/* RetransTime (in milliseconds)*/
3320 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3321 		/* ReachableTime (in milliseconds) */
3322 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3323 	} else {
3324 		/* Those handlers will update p->reachable_time after
3325 		 * base_reachable_time(_ms) is set to ensure the new timer starts being
3326 		 * applied after the next neighbour update instead of waiting for
3327 		 * neigh_periodic_work to update its value (can be multiple minutes)
3328 		 * So any handler that replaces them should do this as well
3329 		 */
3330 		/* ReachableTime */
3331 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3332 			neigh_proc_base_reachable_time;
3333 		/* ReachableTime (in milliseconds) */
3334 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3335 			neigh_proc_base_reachable_time;
3336 	}
3337 
3338 	/* Don't export sysctls to unprivileged users */
3339 	if (neigh_parms_net(p)->user_ns != &init_user_ns)
3340 		t->neigh_vars[0].procname = NULL;
3341 
3342 	switch (neigh_parms_family(p)) {
3343 	case AF_INET:
3344 	      p_name = "ipv4";
3345 	      break;
3346 	case AF_INET6:
3347 	      p_name = "ipv6";
3348 	      break;
3349 	default:
3350 	      BUG();
3351 	}
3352 
3353 	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3354 		p_name, dev_name_source);
3355 	t->sysctl_header =
3356 		register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3357 	if (!t->sysctl_header)
3358 		goto free;
3359 
3360 	p->sysctl_table = t;
3361 	return 0;
3362 
3363 free:
3364 	kfree(t);
3365 err:
3366 	return -ENOBUFS;
3367 }
3368 EXPORT_SYMBOL(neigh_sysctl_register);
3369 
3370 void neigh_sysctl_unregister(struct neigh_parms *p)
3371 {
3372 	if (p->sysctl_table) {
3373 		struct neigh_sysctl_table *t = p->sysctl_table;
3374 		p->sysctl_table = NULL;
3375 		unregister_net_sysctl_table(t->sysctl_header);
3376 		kfree(t);
3377 	}
3378 }
3379 EXPORT_SYMBOL(neigh_sysctl_unregister);
3380 
3381 #endif	/* CONFIG_SYSCTL */
3382 
3383 static int __init neigh_init(void)
3384 {
3385 	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3386 	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3387 	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
3388 
3389 	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3390 		      0);
3391 	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3392 
3393 	return 0;
3394 }
3395 
3396 subsys_initcall(neigh_init);
3397