xref: /openbmc/linux/net/core/neighbour.c (revision a325f174)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Generic address resolution entity
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
8  *
9  *	Fixes:
10  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
11  *	Harald Welte		Add neighbour cache statistics like rtstat
12  */
13 
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 
16 #include <linux/slab.h>
17 #include <linux/kmemleak.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/socket.h>
22 #include <linux/netdevice.h>
23 #include <linux/proc_fs.h>
24 #ifdef CONFIG_SYSCTL
25 #include <linux/sysctl.h>
26 #endif
27 #include <linux/times.h>
28 #include <net/net_namespace.h>
29 #include <net/neighbour.h>
30 #include <net/arp.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 #include <linux/inetdevice.h>
40 #include <net/addrconf.h>
41 
42 #include <trace/events/neigh.h>
43 
44 #define NEIGH_DEBUG 1
45 #define neigh_dbg(level, fmt, ...)		\
46 do {						\
47 	if (level <= NEIGH_DEBUG)		\
48 		pr_debug(fmt, ##__VA_ARGS__);	\
49 } while (0)
50 
51 #define PNEIGH_HASHMASK		0xF
52 
53 static void neigh_timer_handler(struct timer_list *t);
54 static void __neigh_notify(struct neighbour *n, int type, int flags,
55 			   u32 pid);
56 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
57 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
58 				    struct net_device *dev);
59 
60 #ifdef CONFIG_PROC_FS
61 static const struct seq_operations neigh_stat_seq_ops;
62 #endif
63 
64 /*
65    Neighbour hash table buckets are protected with rwlock tbl->lock.
66 
67    - All the scans/updates to hash buckets MUST be made under this lock.
68    - NOTHING clever should be made under this lock: no callbacks
69      to protocol backends, no attempts to send something to network.
70      It will result in deadlocks, if backend/driver wants to use neighbour
71      cache.
72    - If the entry requires some non-trivial actions, increase
73      its reference count and release table lock.
74 
75    Neighbour entries are protected:
76    - with reference count.
77    - with rwlock neigh->lock
78 
79    Reference count prevents destruction.
80 
81    neigh->lock mainly serializes ll address data and its validity state.
82    However, the same lock is used to protect another entry fields:
83     - timer
84     - resolution queue
85 
86    Again, nothing clever shall be made under neigh->lock,
87    the most complicated procedure, which we allow is dev->hard_header.
88    It is supposed, that dev->hard_header is simplistic and does
89    not make callbacks to neighbour tables.
90  */
91 
92 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
93 {
94 	kfree_skb(skb);
95 	return -ENETDOWN;
96 }
97 
98 static void neigh_cleanup_and_release(struct neighbour *neigh)
99 {
100 	trace_neigh_cleanup_and_release(neigh, 0);
101 	__neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
102 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
103 	neigh_release(neigh);
104 }
105 
106 /*
107  * It is random distribution in the interval (1/2)*base...(3/2)*base.
108  * It corresponds to default IPv6 settings and is not overridable,
109  * because it is really reasonable choice.
110  */
111 
112 unsigned long neigh_rand_reach_time(unsigned long base)
113 {
114 	return base ? get_random_u32_below(base) + (base >> 1) : 0;
115 }
116 EXPORT_SYMBOL(neigh_rand_reach_time);
117 
118 static void neigh_mark_dead(struct neighbour *n)
119 {
120 	n->dead = 1;
121 	if (!list_empty(&n->gc_list)) {
122 		list_del_init(&n->gc_list);
123 		atomic_dec(&n->tbl->gc_entries);
124 	}
125 	if (!list_empty(&n->managed_list))
126 		list_del_init(&n->managed_list);
127 }
128 
129 static void neigh_update_gc_list(struct neighbour *n)
130 {
131 	bool on_gc_list, exempt_from_gc;
132 
133 	write_lock_bh(&n->tbl->lock);
134 	write_lock(&n->lock);
135 	if (n->dead)
136 		goto out;
137 
138 	/* remove from the gc list if new state is permanent or if neighbor
139 	 * is externally learned; otherwise entry should be on the gc list
140 	 */
141 	exempt_from_gc = n->nud_state & NUD_PERMANENT ||
142 			 n->flags & NTF_EXT_LEARNED;
143 	on_gc_list = !list_empty(&n->gc_list);
144 
145 	if (exempt_from_gc && on_gc_list) {
146 		list_del_init(&n->gc_list);
147 		atomic_dec(&n->tbl->gc_entries);
148 	} else if (!exempt_from_gc && !on_gc_list) {
149 		/* add entries to the tail; cleaning removes from the front */
150 		list_add_tail(&n->gc_list, &n->tbl->gc_list);
151 		atomic_inc(&n->tbl->gc_entries);
152 	}
153 out:
154 	write_unlock(&n->lock);
155 	write_unlock_bh(&n->tbl->lock);
156 }
157 
158 static void neigh_update_managed_list(struct neighbour *n)
159 {
160 	bool on_managed_list, add_to_managed;
161 
162 	write_lock_bh(&n->tbl->lock);
163 	write_lock(&n->lock);
164 	if (n->dead)
165 		goto out;
166 
167 	add_to_managed = n->flags & NTF_MANAGED;
168 	on_managed_list = !list_empty(&n->managed_list);
169 
170 	if (!add_to_managed && on_managed_list)
171 		list_del_init(&n->managed_list);
172 	else if (add_to_managed && !on_managed_list)
173 		list_add_tail(&n->managed_list, &n->tbl->managed_list);
174 out:
175 	write_unlock(&n->lock);
176 	write_unlock_bh(&n->tbl->lock);
177 }
178 
179 static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
180 			       bool *gc_update, bool *managed_update)
181 {
182 	u32 ndm_flags, old_flags = neigh->flags;
183 
184 	if (!(flags & NEIGH_UPDATE_F_ADMIN))
185 		return;
186 
187 	ndm_flags  = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
188 	ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
189 
190 	if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
191 		if (ndm_flags & NTF_EXT_LEARNED)
192 			neigh->flags |= NTF_EXT_LEARNED;
193 		else
194 			neigh->flags &= ~NTF_EXT_LEARNED;
195 		*notify = 1;
196 		*gc_update = true;
197 	}
198 	if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
199 		if (ndm_flags & NTF_MANAGED)
200 			neigh->flags |= NTF_MANAGED;
201 		else
202 			neigh->flags &= ~NTF_MANAGED;
203 		*notify = 1;
204 		*managed_update = true;
205 	}
206 }
207 
208 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
209 		      struct neigh_table *tbl)
210 {
211 	bool retval = false;
212 
213 	write_lock(&n->lock);
214 	if (refcount_read(&n->refcnt) == 1) {
215 		struct neighbour *neigh;
216 
217 		neigh = rcu_dereference_protected(n->next,
218 						  lockdep_is_held(&tbl->lock));
219 		rcu_assign_pointer(*np, neigh);
220 		neigh_mark_dead(n);
221 		retval = true;
222 	}
223 	write_unlock(&n->lock);
224 	if (retval)
225 		neigh_cleanup_and_release(n);
226 	return retval;
227 }
228 
229 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
230 {
231 	struct neigh_hash_table *nht;
232 	void *pkey = ndel->primary_key;
233 	u32 hash_val;
234 	struct neighbour *n;
235 	struct neighbour __rcu **np;
236 
237 	nht = rcu_dereference_protected(tbl->nht,
238 					lockdep_is_held(&tbl->lock));
239 	hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
240 	hash_val = hash_val >> (32 - nht->hash_shift);
241 
242 	np = &nht->hash_buckets[hash_val];
243 	while ((n = rcu_dereference_protected(*np,
244 					      lockdep_is_held(&tbl->lock)))) {
245 		if (n == ndel)
246 			return neigh_del(n, np, tbl);
247 		np = &n->next;
248 	}
249 	return false;
250 }
251 
252 static int neigh_forced_gc(struct neigh_table *tbl)
253 {
254 	int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
255 	unsigned long tref = jiffies - 5 * HZ;
256 	struct neighbour *n, *tmp;
257 	int shrunk = 0;
258 
259 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
260 
261 	write_lock_bh(&tbl->lock);
262 
263 	list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
264 		if (refcount_read(&n->refcnt) == 1) {
265 			bool remove = false;
266 
267 			write_lock(&n->lock);
268 			if ((n->nud_state == NUD_FAILED) ||
269 			    (n->nud_state == NUD_NOARP) ||
270 			    (tbl->is_multicast &&
271 			     tbl->is_multicast(n->primary_key)) ||
272 			    !time_in_range(n->updated, tref, jiffies))
273 				remove = true;
274 			write_unlock(&n->lock);
275 
276 			if (remove && neigh_remove_one(n, tbl))
277 				shrunk++;
278 			if (shrunk >= max_clean)
279 				break;
280 		}
281 	}
282 
283 	tbl->last_flush = jiffies;
284 
285 	write_unlock_bh(&tbl->lock);
286 
287 	return shrunk;
288 }
289 
290 static void neigh_add_timer(struct neighbour *n, unsigned long when)
291 {
292 	/* Use safe distance from the jiffies - LONG_MAX point while timer
293 	 * is running in DELAY/PROBE state but still show to user space
294 	 * large times in the past.
295 	 */
296 	unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
297 
298 	neigh_hold(n);
299 	if (!time_in_range(n->confirmed, mint, jiffies))
300 		n->confirmed = mint;
301 	if (time_before(n->used, n->confirmed))
302 		n->used = n->confirmed;
303 	if (unlikely(mod_timer(&n->timer, when))) {
304 		printk("NEIGH: BUG, double timer add, state is %x\n",
305 		       n->nud_state);
306 		dump_stack();
307 	}
308 }
309 
310 static int neigh_del_timer(struct neighbour *n)
311 {
312 	if ((n->nud_state & NUD_IN_TIMER) &&
313 	    del_timer(&n->timer)) {
314 		neigh_release(n);
315 		return 1;
316 	}
317 	return 0;
318 }
319 
320 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
321 						   int family)
322 {
323 	switch (family) {
324 	case AF_INET:
325 		return __in_dev_arp_parms_get_rcu(dev);
326 	case AF_INET6:
327 		return __in6_dev_nd_parms_get_rcu(dev);
328 	}
329 	return NULL;
330 }
331 
332 static void neigh_parms_qlen_dec(struct net_device *dev, int family)
333 {
334 	struct neigh_parms *p;
335 
336 	rcu_read_lock();
337 	p = neigh_get_dev_parms_rcu(dev, family);
338 	if (p)
339 		p->qlen--;
340 	rcu_read_unlock();
341 }
342 
343 static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
344 			       int family)
345 {
346 	struct sk_buff_head tmp;
347 	unsigned long flags;
348 	struct sk_buff *skb;
349 
350 	skb_queue_head_init(&tmp);
351 	spin_lock_irqsave(&list->lock, flags);
352 	skb = skb_peek(list);
353 	while (skb != NULL) {
354 		struct sk_buff *skb_next = skb_peek_next(skb, list);
355 		struct net_device *dev = skb->dev;
356 
357 		if (net == NULL || net_eq(dev_net(dev), net)) {
358 			neigh_parms_qlen_dec(dev, family);
359 			__skb_unlink(skb, list);
360 			__skb_queue_tail(&tmp, skb);
361 		}
362 		skb = skb_next;
363 	}
364 	spin_unlock_irqrestore(&list->lock, flags);
365 
366 	while ((skb = __skb_dequeue(&tmp))) {
367 		dev_put(skb->dev);
368 		kfree_skb(skb);
369 	}
370 }
371 
372 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
373 			    bool skip_perm)
374 {
375 	int i;
376 	struct neigh_hash_table *nht;
377 
378 	nht = rcu_dereference_protected(tbl->nht,
379 					lockdep_is_held(&tbl->lock));
380 
381 	for (i = 0; i < (1 << nht->hash_shift); i++) {
382 		struct neighbour *n;
383 		struct neighbour __rcu **np = &nht->hash_buckets[i];
384 
385 		while ((n = rcu_dereference_protected(*np,
386 					lockdep_is_held(&tbl->lock))) != NULL) {
387 			if (dev && n->dev != dev) {
388 				np = &n->next;
389 				continue;
390 			}
391 			if (skip_perm && n->nud_state & NUD_PERMANENT) {
392 				np = &n->next;
393 				continue;
394 			}
395 			rcu_assign_pointer(*np,
396 				   rcu_dereference_protected(n->next,
397 						lockdep_is_held(&tbl->lock)));
398 			write_lock(&n->lock);
399 			neigh_del_timer(n);
400 			neigh_mark_dead(n);
401 			if (refcount_read(&n->refcnt) != 1) {
402 				/* The most unpleasant situation.
403 				   We must destroy neighbour entry,
404 				   but someone still uses it.
405 
406 				   The destroy will be delayed until
407 				   the last user releases us, but
408 				   we must kill timers etc. and move
409 				   it to safe state.
410 				 */
411 				__skb_queue_purge(&n->arp_queue);
412 				n->arp_queue_len_bytes = 0;
413 				WRITE_ONCE(n->output, neigh_blackhole);
414 				if (n->nud_state & NUD_VALID)
415 					n->nud_state = NUD_NOARP;
416 				else
417 					n->nud_state = NUD_NONE;
418 				neigh_dbg(2, "neigh %p is stray\n", n);
419 			}
420 			write_unlock(&n->lock);
421 			neigh_cleanup_and_release(n);
422 		}
423 	}
424 }
425 
426 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
427 {
428 	write_lock_bh(&tbl->lock);
429 	neigh_flush_dev(tbl, dev, false);
430 	write_unlock_bh(&tbl->lock);
431 }
432 EXPORT_SYMBOL(neigh_changeaddr);
433 
434 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
435 			  bool skip_perm)
436 {
437 	write_lock_bh(&tbl->lock);
438 	neigh_flush_dev(tbl, dev, skip_perm);
439 	pneigh_ifdown_and_unlock(tbl, dev);
440 	pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
441 			   tbl->family);
442 	if (skb_queue_empty_lockless(&tbl->proxy_queue))
443 		del_timer_sync(&tbl->proxy_timer);
444 	return 0;
445 }
446 
447 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
448 {
449 	__neigh_ifdown(tbl, dev, true);
450 	return 0;
451 }
452 EXPORT_SYMBOL(neigh_carrier_down);
453 
454 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
455 {
456 	__neigh_ifdown(tbl, dev, false);
457 	return 0;
458 }
459 EXPORT_SYMBOL(neigh_ifdown);
460 
461 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
462 				     struct net_device *dev,
463 				     u32 flags, bool exempt_from_gc)
464 {
465 	struct neighbour *n = NULL;
466 	unsigned long now = jiffies;
467 	int entries;
468 
469 	if (exempt_from_gc)
470 		goto do_alloc;
471 
472 	entries = atomic_inc_return(&tbl->gc_entries) - 1;
473 	if (entries >= tbl->gc_thresh3 ||
474 	    (entries >= tbl->gc_thresh2 &&
475 	     time_after(now, tbl->last_flush + 5 * HZ))) {
476 		if (!neigh_forced_gc(tbl) &&
477 		    entries >= tbl->gc_thresh3) {
478 			net_info_ratelimited("%s: neighbor table overflow!\n",
479 					     tbl->id);
480 			NEIGH_CACHE_STAT_INC(tbl, table_fulls);
481 			goto out_entries;
482 		}
483 	}
484 
485 do_alloc:
486 	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
487 	if (!n)
488 		goto out_entries;
489 
490 	__skb_queue_head_init(&n->arp_queue);
491 	rwlock_init(&n->lock);
492 	seqlock_init(&n->ha_lock);
493 	n->updated	  = n->used = now;
494 	n->nud_state	  = NUD_NONE;
495 	n->output	  = neigh_blackhole;
496 	n->flags	  = flags;
497 	seqlock_init(&n->hh.hh_lock);
498 	n->parms	  = neigh_parms_clone(&tbl->parms);
499 	timer_setup(&n->timer, neigh_timer_handler, 0);
500 
501 	NEIGH_CACHE_STAT_INC(tbl, allocs);
502 	n->tbl		  = tbl;
503 	refcount_set(&n->refcnt, 1);
504 	n->dead		  = 1;
505 	INIT_LIST_HEAD(&n->gc_list);
506 	INIT_LIST_HEAD(&n->managed_list);
507 
508 	atomic_inc(&tbl->entries);
509 out:
510 	return n;
511 
512 out_entries:
513 	if (!exempt_from_gc)
514 		atomic_dec(&tbl->gc_entries);
515 	goto out;
516 }
517 
518 static void neigh_get_hash_rnd(u32 *x)
519 {
520 	*x = get_random_u32() | 1;
521 }
522 
523 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
524 {
525 	size_t size = (1 << shift) * sizeof(struct neighbour *);
526 	struct neigh_hash_table *ret;
527 	struct neighbour __rcu **buckets;
528 	int i;
529 
530 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
531 	if (!ret)
532 		return NULL;
533 	if (size <= PAGE_SIZE) {
534 		buckets = kzalloc(size, GFP_ATOMIC);
535 	} else {
536 		buckets = (struct neighbour __rcu **)
537 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
538 					   get_order(size));
539 		kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
540 	}
541 	if (!buckets) {
542 		kfree(ret);
543 		return NULL;
544 	}
545 	ret->hash_buckets = buckets;
546 	ret->hash_shift = shift;
547 	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
548 		neigh_get_hash_rnd(&ret->hash_rnd[i]);
549 	return ret;
550 }
551 
552 static void neigh_hash_free_rcu(struct rcu_head *head)
553 {
554 	struct neigh_hash_table *nht = container_of(head,
555 						    struct neigh_hash_table,
556 						    rcu);
557 	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
558 	struct neighbour __rcu **buckets = nht->hash_buckets;
559 
560 	if (size <= PAGE_SIZE) {
561 		kfree(buckets);
562 	} else {
563 		kmemleak_free(buckets);
564 		free_pages((unsigned long)buckets, get_order(size));
565 	}
566 	kfree(nht);
567 }
568 
569 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
570 						unsigned long new_shift)
571 {
572 	unsigned int i, hash;
573 	struct neigh_hash_table *new_nht, *old_nht;
574 
575 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
576 
577 	old_nht = rcu_dereference_protected(tbl->nht,
578 					    lockdep_is_held(&tbl->lock));
579 	new_nht = neigh_hash_alloc(new_shift);
580 	if (!new_nht)
581 		return old_nht;
582 
583 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
584 		struct neighbour *n, *next;
585 
586 		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
587 						   lockdep_is_held(&tbl->lock));
588 		     n != NULL;
589 		     n = next) {
590 			hash = tbl->hash(n->primary_key, n->dev,
591 					 new_nht->hash_rnd);
592 
593 			hash >>= (32 - new_nht->hash_shift);
594 			next = rcu_dereference_protected(n->next,
595 						lockdep_is_held(&tbl->lock));
596 
597 			rcu_assign_pointer(n->next,
598 					   rcu_dereference_protected(
599 						new_nht->hash_buckets[hash],
600 						lockdep_is_held(&tbl->lock)));
601 			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
602 		}
603 	}
604 
605 	rcu_assign_pointer(tbl->nht, new_nht);
606 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
607 	return new_nht;
608 }
609 
610 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
611 			       struct net_device *dev)
612 {
613 	struct neighbour *n;
614 
615 	NEIGH_CACHE_STAT_INC(tbl, lookups);
616 
617 	rcu_read_lock();
618 	n = __neigh_lookup_noref(tbl, pkey, dev);
619 	if (n) {
620 		if (!refcount_inc_not_zero(&n->refcnt))
621 			n = NULL;
622 		NEIGH_CACHE_STAT_INC(tbl, hits);
623 	}
624 
625 	rcu_read_unlock();
626 	return n;
627 }
628 EXPORT_SYMBOL(neigh_lookup);
629 
630 static struct neighbour *
631 ___neigh_create(struct neigh_table *tbl, const void *pkey,
632 		struct net_device *dev, u32 flags,
633 		bool exempt_from_gc, bool want_ref)
634 {
635 	u32 hash_val, key_len = tbl->key_len;
636 	struct neighbour *n1, *rc, *n;
637 	struct neigh_hash_table *nht;
638 	int error;
639 
640 	n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
641 	trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
642 	if (!n) {
643 		rc = ERR_PTR(-ENOBUFS);
644 		goto out;
645 	}
646 
647 	memcpy(n->primary_key, pkey, key_len);
648 	n->dev = dev;
649 	netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC);
650 
651 	/* Protocol specific setup. */
652 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
653 		rc = ERR_PTR(error);
654 		goto out_neigh_release;
655 	}
656 
657 	if (dev->netdev_ops->ndo_neigh_construct) {
658 		error = dev->netdev_ops->ndo_neigh_construct(dev, n);
659 		if (error < 0) {
660 			rc = ERR_PTR(error);
661 			goto out_neigh_release;
662 		}
663 	}
664 
665 	/* Device specific setup. */
666 	if (n->parms->neigh_setup &&
667 	    (error = n->parms->neigh_setup(n)) < 0) {
668 		rc = ERR_PTR(error);
669 		goto out_neigh_release;
670 	}
671 
672 	n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
673 
674 	write_lock_bh(&tbl->lock);
675 	nht = rcu_dereference_protected(tbl->nht,
676 					lockdep_is_held(&tbl->lock));
677 
678 	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
679 		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
680 
681 	hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
682 
683 	if (n->parms->dead) {
684 		rc = ERR_PTR(-EINVAL);
685 		goto out_tbl_unlock;
686 	}
687 
688 	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
689 					    lockdep_is_held(&tbl->lock));
690 	     n1 != NULL;
691 	     n1 = rcu_dereference_protected(n1->next,
692 			lockdep_is_held(&tbl->lock))) {
693 		if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
694 			if (want_ref)
695 				neigh_hold(n1);
696 			rc = n1;
697 			goto out_tbl_unlock;
698 		}
699 	}
700 
701 	n->dead = 0;
702 	if (!exempt_from_gc)
703 		list_add_tail(&n->gc_list, &n->tbl->gc_list);
704 	if (n->flags & NTF_MANAGED)
705 		list_add_tail(&n->managed_list, &n->tbl->managed_list);
706 	if (want_ref)
707 		neigh_hold(n);
708 	rcu_assign_pointer(n->next,
709 			   rcu_dereference_protected(nht->hash_buckets[hash_val],
710 						     lockdep_is_held(&tbl->lock)));
711 	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
712 	write_unlock_bh(&tbl->lock);
713 	neigh_dbg(2, "neigh %p is created\n", n);
714 	rc = n;
715 out:
716 	return rc;
717 out_tbl_unlock:
718 	write_unlock_bh(&tbl->lock);
719 out_neigh_release:
720 	if (!exempt_from_gc)
721 		atomic_dec(&tbl->gc_entries);
722 	neigh_release(n);
723 	goto out;
724 }
725 
726 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
727 				 struct net_device *dev, bool want_ref)
728 {
729 	return ___neigh_create(tbl, pkey, dev, 0, false, want_ref);
730 }
731 EXPORT_SYMBOL(__neigh_create);
732 
733 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
734 {
735 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
736 	hash_val ^= (hash_val >> 16);
737 	hash_val ^= hash_val >> 8;
738 	hash_val ^= hash_val >> 4;
739 	hash_val &= PNEIGH_HASHMASK;
740 	return hash_val;
741 }
742 
743 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
744 					      struct net *net,
745 					      const void *pkey,
746 					      unsigned int key_len,
747 					      struct net_device *dev)
748 {
749 	while (n) {
750 		if (!memcmp(n->key, pkey, key_len) &&
751 		    net_eq(pneigh_net(n), net) &&
752 		    (n->dev == dev || !n->dev))
753 			return n;
754 		n = n->next;
755 	}
756 	return NULL;
757 }
758 
759 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
760 		struct net *net, const void *pkey, struct net_device *dev)
761 {
762 	unsigned int key_len = tbl->key_len;
763 	u32 hash_val = pneigh_hash(pkey, key_len);
764 
765 	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
766 				 net, pkey, key_len, dev);
767 }
768 EXPORT_SYMBOL_GPL(__pneigh_lookup);
769 
770 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
771 				    struct net *net, const void *pkey,
772 				    struct net_device *dev, int creat)
773 {
774 	struct pneigh_entry *n;
775 	unsigned int key_len = tbl->key_len;
776 	u32 hash_val = pneigh_hash(pkey, key_len);
777 
778 	read_lock_bh(&tbl->lock);
779 	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
780 			      net, pkey, key_len, dev);
781 	read_unlock_bh(&tbl->lock);
782 
783 	if (n || !creat)
784 		goto out;
785 
786 	ASSERT_RTNL();
787 
788 	n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
789 	if (!n)
790 		goto out;
791 
792 	write_pnet(&n->net, net);
793 	memcpy(n->key, pkey, key_len);
794 	n->dev = dev;
795 	netdev_hold(dev, &n->dev_tracker, GFP_KERNEL);
796 
797 	if (tbl->pconstructor && tbl->pconstructor(n)) {
798 		netdev_put(dev, &n->dev_tracker);
799 		kfree(n);
800 		n = NULL;
801 		goto out;
802 	}
803 
804 	write_lock_bh(&tbl->lock);
805 	n->next = tbl->phash_buckets[hash_val];
806 	tbl->phash_buckets[hash_val] = n;
807 	write_unlock_bh(&tbl->lock);
808 out:
809 	return n;
810 }
811 EXPORT_SYMBOL(pneigh_lookup);
812 
813 
814 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
815 		  struct net_device *dev)
816 {
817 	struct pneigh_entry *n, **np;
818 	unsigned int key_len = tbl->key_len;
819 	u32 hash_val = pneigh_hash(pkey, key_len);
820 
821 	write_lock_bh(&tbl->lock);
822 	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
823 	     np = &n->next) {
824 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
825 		    net_eq(pneigh_net(n), net)) {
826 			*np = n->next;
827 			write_unlock_bh(&tbl->lock);
828 			if (tbl->pdestructor)
829 				tbl->pdestructor(n);
830 			netdev_put(n->dev, &n->dev_tracker);
831 			kfree(n);
832 			return 0;
833 		}
834 	}
835 	write_unlock_bh(&tbl->lock);
836 	return -ENOENT;
837 }
838 
839 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
840 				    struct net_device *dev)
841 {
842 	struct pneigh_entry *n, **np, *freelist = NULL;
843 	u32 h;
844 
845 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
846 		np = &tbl->phash_buckets[h];
847 		while ((n = *np) != NULL) {
848 			if (!dev || n->dev == dev) {
849 				*np = n->next;
850 				n->next = freelist;
851 				freelist = n;
852 				continue;
853 			}
854 			np = &n->next;
855 		}
856 	}
857 	write_unlock_bh(&tbl->lock);
858 	while ((n = freelist)) {
859 		freelist = n->next;
860 		n->next = NULL;
861 		if (tbl->pdestructor)
862 			tbl->pdestructor(n);
863 		netdev_put(n->dev, &n->dev_tracker);
864 		kfree(n);
865 	}
866 	return -ENOENT;
867 }
868 
869 static void neigh_parms_destroy(struct neigh_parms *parms);
870 
871 static inline void neigh_parms_put(struct neigh_parms *parms)
872 {
873 	if (refcount_dec_and_test(&parms->refcnt))
874 		neigh_parms_destroy(parms);
875 }
876 
877 /*
878  *	neighbour must already be out of the table;
879  *
880  */
881 void neigh_destroy(struct neighbour *neigh)
882 {
883 	struct net_device *dev = neigh->dev;
884 
885 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
886 
887 	if (!neigh->dead) {
888 		pr_warn("Destroying alive neighbour %p\n", neigh);
889 		dump_stack();
890 		return;
891 	}
892 
893 	if (neigh_del_timer(neigh))
894 		pr_warn("Impossible event\n");
895 
896 	write_lock_bh(&neigh->lock);
897 	__skb_queue_purge(&neigh->arp_queue);
898 	write_unlock_bh(&neigh->lock);
899 	neigh->arp_queue_len_bytes = 0;
900 
901 	if (dev->netdev_ops->ndo_neigh_destroy)
902 		dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
903 
904 	netdev_put(dev, &neigh->dev_tracker);
905 	neigh_parms_put(neigh->parms);
906 
907 	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
908 
909 	atomic_dec(&neigh->tbl->entries);
910 	kfree_rcu(neigh, rcu);
911 }
912 EXPORT_SYMBOL(neigh_destroy);
913 
914 /* Neighbour state is suspicious;
915    disable fast path.
916 
917    Called with write_locked neigh.
918  */
919 static void neigh_suspect(struct neighbour *neigh)
920 {
921 	neigh_dbg(2, "neigh %p is suspected\n", neigh);
922 
923 	WRITE_ONCE(neigh->output, neigh->ops->output);
924 }
925 
926 /* Neighbour state is OK;
927    enable fast path.
928 
929    Called with write_locked neigh.
930  */
931 static void neigh_connect(struct neighbour *neigh)
932 {
933 	neigh_dbg(2, "neigh %p is connected\n", neigh);
934 
935 	WRITE_ONCE(neigh->output, neigh->ops->connected_output);
936 }
937 
938 static void neigh_periodic_work(struct work_struct *work)
939 {
940 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
941 	struct neighbour *n;
942 	struct neighbour __rcu **np;
943 	unsigned int i;
944 	struct neigh_hash_table *nht;
945 
946 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
947 
948 	write_lock_bh(&tbl->lock);
949 	nht = rcu_dereference_protected(tbl->nht,
950 					lockdep_is_held(&tbl->lock));
951 
952 	/*
953 	 *	periodically recompute ReachableTime from random function
954 	 */
955 
956 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
957 		struct neigh_parms *p;
958 		tbl->last_rand = jiffies;
959 		list_for_each_entry(p, &tbl->parms_list, list)
960 			p->reachable_time =
961 				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
962 	}
963 
964 	if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
965 		goto out;
966 
967 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
968 		np = &nht->hash_buckets[i];
969 
970 		while ((n = rcu_dereference_protected(*np,
971 				lockdep_is_held(&tbl->lock))) != NULL) {
972 			unsigned int state;
973 
974 			write_lock(&n->lock);
975 
976 			state = n->nud_state;
977 			if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
978 			    (n->flags & NTF_EXT_LEARNED)) {
979 				write_unlock(&n->lock);
980 				goto next_elt;
981 			}
982 
983 			if (time_before(n->used, n->confirmed) &&
984 			    time_is_before_eq_jiffies(n->confirmed))
985 				n->used = n->confirmed;
986 
987 			if (refcount_read(&n->refcnt) == 1 &&
988 			    (state == NUD_FAILED ||
989 			     !time_in_range_open(jiffies, n->used,
990 						 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
991 				rcu_assign_pointer(*np,
992 					rcu_dereference_protected(n->next,
993 						lockdep_is_held(&tbl->lock)));
994 				neigh_mark_dead(n);
995 				write_unlock(&n->lock);
996 				neigh_cleanup_and_release(n);
997 				continue;
998 			}
999 			write_unlock(&n->lock);
1000 
1001 next_elt:
1002 			np = &n->next;
1003 		}
1004 		/*
1005 		 * It's fine to release lock here, even if hash table
1006 		 * grows while we are preempted.
1007 		 */
1008 		write_unlock_bh(&tbl->lock);
1009 		cond_resched();
1010 		write_lock_bh(&tbl->lock);
1011 		nht = rcu_dereference_protected(tbl->nht,
1012 						lockdep_is_held(&tbl->lock));
1013 	}
1014 out:
1015 	/* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
1016 	 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
1017 	 * BASE_REACHABLE_TIME.
1018 	 */
1019 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1020 			      NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
1021 	write_unlock_bh(&tbl->lock);
1022 }
1023 
1024 static __inline__ int neigh_max_probes(struct neighbour *n)
1025 {
1026 	struct neigh_parms *p = n->parms;
1027 	return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
1028 	       (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
1029 	        NEIGH_VAR(p, MCAST_PROBES));
1030 }
1031 
1032 static void neigh_invalidate(struct neighbour *neigh)
1033 	__releases(neigh->lock)
1034 	__acquires(neigh->lock)
1035 {
1036 	struct sk_buff *skb;
1037 
1038 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
1039 	neigh_dbg(2, "neigh %p is failed\n", neigh);
1040 	neigh->updated = jiffies;
1041 
1042 	/* It is very thin place. report_unreachable is very complicated
1043 	   routine. Particularly, it can hit the same neighbour entry!
1044 
1045 	   So that, we try to be accurate and avoid dead loop. --ANK
1046 	 */
1047 	while (neigh->nud_state == NUD_FAILED &&
1048 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1049 		write_unlock(&neigh->lock);
1050 		neigh->ops->error_report(neigh, skb);
1051 		write_lock(&neigh->lock);
1052 	}
1053 	__skb_queue_purge(&neigh->arp_queue);
1054 	neigh->arp_queue_len_bytes = 0;
1055 }
1056 
1057 static void neigh_probe(struct neighbour *neigh)
1058 	__releases(neigh->lock)
1059 {
1060 	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1061 	/* keep skb alive even if arp_queue overflows */
1062 	if (skb)
1063 		skb = skb_clone(skb, GFP_ATOMIC);
1064 	write_unlock(&neigh->lock);
1065 	if (neigh->ops->solicit)
1066 		neigh->ops->solicit(neigh, skb);
1067 	atomic_inc(&neigh->probes);
1068 	consume_skb(skb);
1069 }
1070 
1071 /* Called when a timer expires for a neighbour entry. */
1072 
1073 static void neigh_timer_handler(struct timer_list *t)
1074 {
1075 	unsigned long now, next;
1076 	struct neighbour *neigh = from_timer(neigh, t, timer);
1077 	unsigned int state;
1078 	int notify = 0;
1079 
1080 	write_lock(&neigh->lock);
1081 
1082 	state = neigh->nud_state;
1083 	now = jiffies;
1084 	next = now + HZ;
1085 
1086 	if (!(state & NUD_IN_TIMER))
1087 		goto out;
1088 
1089 	if (state & NUD_REACHABLE) {
1090 		if (time_before_eq(now,
1091 				   neigh->confirmed + neigh->parms->reachable_time)) {
1092 			neigh_dbg(2, "neigh %p is still alive\n", neigh);
1093 			next = neigh->confirmed + neigh->parms->reachable_time;
1094 		} else if (time_before_eq(now,
1095 					  neigh->used +
1096 					  NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1097 			neigh_dbg(2, "neigh %p is delayed\n", neigh);
1098 			WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1099 			neigh->updated = jiffies;
1100 			neigh_suspect(neigh);
1101 			next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1102 		} else {
1103 			neigh_dbg(2, "neigh %p is suspected\n", neigh);
1104 			WRITE_ONCE(neigh->nud_state, NUD_STALE);
1105 			neigh->updated = jiffies;
1106 			neigh_suspect(neigh);
1107 			notify = 1;
1108 		}
1109 	} else if (state & NUD_DELAY) {
1110 		if (time_before_eq(now,
1111 				   neigh->confirmed +
1112 				   NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1113 			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1114 			WRITE_ONCE(neigh->nud_state, NUD_REACHABLE);
1115 			neigh->updated = jiffies;
1116 			neigh_connect(neigh);
1117 			notify = 1;
1118 			next = neigh->confirmed + neigh->parms->reachable_time;
1119 		} else {
1120 			neigh_dbg(2, "neigh %p is probed\n", neigh);
1121 			WRITE_ONCE(neigh->nud_state, NUD_PROBE);
1122 			neigh->updated = jiffies;
1123 			atomic_set(&neigh->probes, 0);
1124 			notify = 1;
1125 			next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1126 					 HZ/100);
1127 		}
1128 	} else {
1129 		/* NUD_PROBE|NUD_INCOMPLETE */
1130 		next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
1131 	}
1132 
1133 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1134 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1135 		WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1136 		notify = 1;
1137 		neigh_invalidate(neigh);
1138 		goto out;
1139 	}
1140 
1141 	if (neigh->nud_state & NUD_IN_TIMER) {
1142 		if (time_before(next, jiffies + HZ/100))
1143 			next = jiffies + HZ/100;
1144 		if (!mod_timer(&neigh->timer, next))
1145 			neigh_hold(neigh);
1146 	}
1147 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1148 		neigh_probe(neigh);
1149 	} else {
1150 out:
1151 		write_unlock(&neigh->lock);
1152 	}
1153 
1154 	if (notify)
1155 		neigh_update_notify(neigh, 0);
1156 
1157 	trace_neigh_timer_handler(neigh, 0);
1158 
1159 	neigh_release(neigh);
1160 }
1161 
1162 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
1163 		       const bool immediate_ok)
1164 {
1165 	int rc;
1166 	bool immediate_probe = false;
1167 
1168 	write_lock_bh(&neigh->lock);
1169 
1170 	rc = 0;
1171 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1172 		goto out_unlock_bh;
1173 	if (neigh->dead)
1174 		goto out_dead;
1175 
1176 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1177 		if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1178 		    NEIGH_VAR(neigh->parms, APP_PROBES)) {
1179 			unsigned long next, now = jiffies;
1180 
1181 			atomic_set(&neigh->probes,
1182 				   NEIGH_VAR(neigh->parms, UCAST_PROBES));
1183 			neigh_del_timer(neigh);
1184 			WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1185 			neigh->updated = now;
1186 			if (!immediate_ok) {
1187 				next = now + 1;
1188 			} else {
1189 				immediate_probe = true;
1190 				next = now + max(NEIGH_VAR(neigh->parms,
1191 							   RETRANS_TIME),
1192 						 HZ / 100);
1193 			}
1194 			neigh_add_timer(neigh, next);
1195 		} else {
1196 			WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1197 			neigh->updated = jiffies;
1198 			write_unlock_bh(&neigh->lock);
1199 
1200 			kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
1201 			return 1;
1202 		}
1203 	} else if (neigh->nud_state & NUD_STALE) {
1204 		neigh_dbg(2, "neigh %p is delayed\n", neigh);
1205 		neigh_del_timer(neigh);
1206 		WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1207 		neigh->updated = jiffies;
1208 		neigh_add_timer(neigh, jiffies +
1209 				NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1210 	}
1211 
1212 	if (neigh->nud_state == NUD_INCOMPLETE) {
1213 		if (skb) {
1214 			while (neigh->arp_queue_len_bytes + skb->truesize >
1215 			       NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1216 				struct sk_buff *buff;
1217 
1218 				buff = __skb_dequeue(&neigh->arp_queue);
1219 				if (!buff)
1220 					break;
1221 				neigh->arp_queue_len_bytes -= buff->truesize;
1222 				kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
1223 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1224 			}
1225 			skb_dst_force(skb);
1226 			__skb_queue_tail(&neigh->arp_queue, skb);
1227 			neigh->arp_queue_len_bytes += skb->truesize;
1228 		}
1229 		rc = 1;
1230 	}
1231 out_unlock_bh:
1232 	if (immediate_probe)
1233 		neigh_probe(neigh);
1234 	else
1235 		write_unlock(&neigh->lock);
1236 	local_bh_enable();
1237 	trace_neigh_event_send_done(neigh, rc);
1238 	return rc;
1239 
1240 out_dead:
1241 	if (neigh->nud_state & NUD_STALE)
1242 		goto out_unlock_bh;
1243 	write_unlock_bh(&neigh->lock);
1244 	kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
1245 	trace_neigh_event_send_dead(neigh, 1);
1246 	return 1;
1247 }
1248 EXPORT_SYMBOL(__neigh_event_send);
1249 
1250 static void neigh_update_hhs(struct neighbour *neigh)
1251 {
1252 	struct hh_cache *hh;
1253 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1254 		= NULL;
1255 
1256 	if (neigh->dev->header_ops)
1257 		update = neigh->dev->header_ops->cache_update;
1258 
1259 	if (update) {
1260 		hh = &neigh->hh;
1261 		if (READ_ONCE(hh->hh_len)) {
1262 			write_seqlock_bh(&hh->hh_lock);
1263 			update(hh, neigh->dev, neigh->ha);
1264 			write_sequnlock_bh(&hh->hh_lock);
1265 		}
1266 	}
1267 }
1268 
1269 /* Generic update routine.
1270    -- lladdr is new lladdr or NULL, if it is not supplied.
1271    -- new    is new state.
1272    -- flags
1273 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1274 				if it is different.
1275 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1276 				lladdr instead of overriding it
1277 				if it is different.
1278 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1279 	NEIGH_UPDATE_F_USE	means that the entry is user triggered.
1280 	NEIGH_UPDATE_F_MANAGED	means that the entry will be auto-refreshed.
1281 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1282 				NTF_ROUTER flag.
1283 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1284 				a router.
1285 
1286    Caller MUST hold reference count on the entry.
1287  */
1288 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1289 			  u8 new, u32 flags, u32 nlmsg_pid,
1290 			  struct netlink_ext_ack *extack)
1291 {
1292 	bool gc_update = false, managed_update = false;
1293 	int update_isrouter = 0;
1294 	struct net_device *dev;
1295 	int err, notify = 0;
1296 	u8 old;
1297 
1298 	trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1299 
1300 	write_lock_bh(&neigh->lock);
1301 
1302 	dev    = neigh->dev;
1303 	old    = neigh->nud_state;
1304 	err    = -EPERM;
1305 
1306 	if (neigh->dead) {
1307 		NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1308 		new = old;
1309 		goto out;
1310 	}
1311 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1312 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1313 		goto out;
1314 
1315 	neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
1316 	if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
1317 		new = old & ~NUD_PERMANENT;
1318 		WRITE_ONCE(neigh->nud_state, new);
1319 		err = 0;
1320 		goto out;
1321 	}
1322 
1323 	if (!(new & NUD_VALID)) {
1324 		neigh_del_timer(neigh);
1325 		if (old & NUD_CONNECTED)
1326 			neigh_suspect(neigh);
1327 		WRITE_ONCE(neigh->nud_state, new);
1328 		err = 0;
1329 		notify = old & NUD_VALID;
1330 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1331 		    (new & NUD_FAILED)) {
1332 			neigh_invalidate(neigh);
1333 			notify = 1;
1334 		}
1335 		goto out;
1336 	}
1337 
1338 	/* Compare new lladdr with cached one */
1339 	if (!dev->addr_len) {
1340 		/* First case: device needs no address. */
1341 		lladdr = neigh->ha;
1342 	} else if (lladdr) {
1343 		/* The second case: if something is already cached
1344 		   and a new address is proposed:
1345 		   - compare new & old
1346 		   - if they are different, check override flag
1347 		 */
1348 		if ((old & NUD_VALID) &&
1349 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1350 			lladdr = neigh->ha;
1351 	} else {
1352 		/* No address is supplied; if we know something,
1353 		   use it, otherwise discard the request.
1354 		 */
1355 		err = -EINVAL;
1356 		if (!(old & NUD_VALID)) {
1357 			NL_SET_ERR_MSG(extack, "No link layer address given");
1358 			goto out;
1359 		}
1360 		lladdr = neigh->ha;
1361 	}
1362 
1363 	/* Update confirmed timestamp for neighbour entry after we
1364 	 * received ARP packet even if it doesn't change IP to MAC binding.
1365 	 */
1366 	if (new & NUD_CONNECTED)
1367 		neigh->confirmed = jiffies;
1368 
1369 	/* If entry was valid and address is not changed,
1370 	   do not change entry state, if new one is STALE.
1371 	 */
1372 	err = 0;
1373 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1374 	if (old & NUD_VALID) {
1375 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1376 			update_isrouter = 0;
1377 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1378 			    (old & NUD_CONNECTED)) {
1379 				lladdr = neigh->ha;
1380 				new = NUD_STALE;
1381 			} else
1382 				goto out;
1383 		} else {
1384 			if (lladdr == neigh->ha && new == NUD_STALE &&
1385 			    !(flags & NEIGH_UPDATE_F_ADMIN))
1386 				new = old;
1387 		}
1388 	}
1389 
1390 	/* Update timestamp only once we know we will make a change to the
1391 	 * neighbour entry. Otherwise we risk to move the locktime window with
1392 	 * noop updates and ignore relevant ARP updates.
1393 	 */
1394 	if (new != old || lladdr != neigh->ha)
1395 		neigh->updated = jiffies;
1396 
1397 	if (new != old) {
1398 		neigh_del_timer(neigh);
1399 		if (new & NUD_PROBE)
1400 			atomic_set(&neigh->probes, 0);
1401 		if (new & NUD_IN_TIMER)
1402 			neigh_add_timer(neigh, (jiffies +
1403 						((new & NUD_REACHABLE) ?
1404 						 neigh->parms->reachable_time :
1405 						 0)));
1406 		WRITE_ONCE(neigh->nud_state, new);
1407 		notify = 1;
1408 	}
1409 
1410 	if (lladdr != neigh->ha) {
1411 		write_seqlock(&neigh->ha_lock);
1412 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1413 		write_sequnlock(&neigh->ha_lock);
1414 		neigh_update_hhs(neigh);
1415 		if (!(new & NUD_CONNECTED))
1416 			neigh->confirmed = jiffies -
1417 				      (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1418 		notify = 1;
1419 	}
1420 	if (new == old)
1421 		goto out;
1422 	if (new & NUD_CONNECTED)
1423 		neigh_connect(neigh);
1424 	else
1425 		neigh_suspect(neigh);
1426 	if (!(old & NUD_VALID)) {
1427 		struct sk_buff *skb;
1428 
1429 		/* Again: avoid dead loop if something went wrong */
1430 
1431 		while (neigh->nud_state & NUD_VALID &&
1432 		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1433 			struct dst_entry *dst = skb_dst(skb);
1434 			struct neighbour *n2, *n1 = neigh;
1435 			write_unlock_bh(&neigh->lock);
1436 
1437 			rcu_read_lock();
1438 
1439 			/* Why not just use 'neigh' as-is?  The problem is that
1440 			 * things such as shaper, eql, and sch_teql can end up
1441 			 * using alternative, different, neigh objects to output
1442 			 * the packet in the output path.  So what we need to do
1443 			 * here is re-lookup the top-level neigh in the path so
1444 			 * we can reinject the packet there.
1445 			 */
1446 			n2 = NULL;
1447 			if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1448 				n2 = dst_neigh_lookup_skb(dst, skb);
1449 				if (n2)
1450 					n1 = n2;
1451 			}
1452 			READ_ONCE(n1->output)(n1, skb);
1453 			if (n2)
1454 				neigh_release(n2);
1455 			rcu_read_unlock();
1456 
1457 			write_lock_bh(&neigh->lock);
1458 		}
1459 		__skb_queue_purge(&neigh->arp_queue);
1460 		neigh->arp_queue_len_bytes = 0;
1461 	}
1462 out:
1463 	if (update_isrouter)
1464 		neigh_update_is_router(neigh, flags, &notify);
1465 	write_unlock_bh(&neigh->lock);
1466 	if (((new ^ old) & NUD_PERMANENT) || gc_update)
1467 		neigh_update_gc_list(neigh);
1468 	if (managed_update)
1469 		neigh_update_managed_list(neigh);
1470 	if (notify)
1471 		neigh_update_notify(neigh, nlmsg_pid);
1472 	trace_neigh_update_done(neigh, err);
1473 	return err;
1474 }
1475 
1476 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1477 		 u32 flags, u32 nlmsg_pid)
1478 {
1479 	return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1480 }
1481 EXPORT_SYMBOL(neigh_update);
1482 
1483 /* Update the neigh to listen temporarily for probe responses, even if it is
1484  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1485  */
1486 void __neigh_set_probe_once(struct neighbour *neigh)
1487 {
1488 	if (neigh->dead)
1489 		return;
1490 	neigh->updated = jiffies;
1491 	if (!(neigh->nud_state & NUD_FAILED))
1492 		return;
1493 	WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1494 	atomic_set(&neigh->probes, neigh_max_probes(neigh));
1495 	neigh_add_timer(neigh,
1496 			jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1497 				      HZ/100));
1498 }
1499 EXPORT_SYMBOL(__neigh_set_probe_once);
1500 
1501 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1502 				 u8 *lladdr, void *saddr,
1503 				 struct net_device *dev)
1504 {
1505 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1506 						 lladdr || !dev->addr_len);
1507 	if (neigh)
1508 		neigh_update(neigh, lladdr, NUD_STALE,
1509 			     NEIGH_UPDATE_F_OVERRIDE, 0);
1510 	return neigh;
1511 }
1512 EXPORT_SYMBOL(neigh_event_ns);
1513 
1514 /* called with read_lock_bh(&n->lock); */
1515 static void neigh_hh_init(struct neighbour *n)
1516 {
1517 	struct net_device *dev = n->dev;
1518 	__be16 prot = n->tbl->protocol;
1519 	struct hh_cache	*hh = &n->hh;
1520 
1521 	write_lock_bh(&n->lock);
1522 
1523 	/* Only one thread can come in here and initialize the
1524 	 * hh_cache entry.
1525 	 */
1526 	if (!hh->hh_len)
1527 		dev->header_ops->cache(n, hh, prot);
1528 
1529 	write_unlock_bh(&n->lock);
1530 }
1531 
1532 /* Slow and careful. */
1533 
1534 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1535 {
1536 	int rc = 0;
1537 
1538 	if (!neigh_event_send(neigh, skb)) {
1539 		int err;
1540 		struct net_device *dev = neigh->dev;
1541 		unsigned int seq;
1542 
1543 		if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1544 			neigh_hh_init(neigh);
1545 
1546 		do {
1547 			__skb_pull(skb, skb_network_offset(skb));
1548 			seq = read_seqbegin(&neigh->ha_lock);
1549 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1550 					      neigh->ha, NULL, skb->len);
1551 		} while (read_seqretry(&neigh->ha_lock, seq));
1552 
1553 		if (err >= 0)
1554 			rc = dev_queue_xmit(skb);
1555 		else
1556 			goto out_kfree_skb;
1557 	}
1558 out:
1559 	return rc;
1560 out_kfree_skb:
1561 	rc = -EINVAL;
1562 	kfree_skb(skb);
1563 	goto out;
1564 }
1565 EXPORT_SYMBOL(neigh_resolve_output);
1566 
1567 /* As fast as possible without hh cache */
1568 
1569 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1570 {
1571 	struct net_device *dev = neigh->dev;
1572 	unsigned int seq;
1573 	int err;
1574 
1575 	do {
1576 		__skb_pull(skb, skb_network_offset(skb));
1577 		seq = read_seqbegin(&neigh->ha_lock);
1578 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1579 				      neigh->ha, NULL, skb->len);
1580 	} while (read_seqretry(&neigh->ha_lock, seq));
1581 
1582 	if (err >= 0)
1583 		err = dev_queue_xmit(skb);
1584 	else {
1585 		err = -EINVAL;
1586 		kfree_skb(skb);
1587 	}
1588 	return err;
1589 }
1590 EXPORT_SYMBOL(neigh_connected_output);
1591 
1592 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1593 {
1594 	return dev_queue_xmit(skb);
1595 }
1596 EXPORT_SYMBOL(neigh_direct_output);
1597 
1598 static void neigh_managed_work(struct work_struct *work)
1599 {
1600 	struct neigh_table *tbl = container_of(work, struct neigh_table,
1601 					       managed_work.work);
1602 	struct neighbour *neigh;
1603 
1604 	write_lock_bh(&tbl->lock);
1605 	list_for_each_entry(neigh, &tbl->managed_list, managed_list)
1606 		neigh_event_send_probe(neigh, NULL, false);
1607 	queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
1608 			   NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS));
1609 	write_unlock_bh(&tbl->lock);
1610 }
1611 
1612 static void neigh_proxy_process(struct timer_list *t)
1613 {
1614 	struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1615 	long sched_next = 0;
1616 	unsigned long now = jiffies;
1617 	struct sk_buff *skb, *n;
1618 
1619 	spin_lock(&tbl->proxy_queue.lock);
1620 
1621 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1622 		long tdif = NEIGH_CB(skb)->sched_next - now;
1623 
1624 		if (tdif <= 0) {
1625 			struct net_device *dev = skb->dev;
1626 
1627 			neigh_parms_qlen_dec(dev, tbl->family);
1628 			__skb_unlink(skb, &tbl->proxy_queue);
1629 
1630 			if (tbl->proxy_redo && netif_running(dev)) {
1631 				rcu_read_lock();
1632 				tbl->proxy_redo(skb);
1633 				rcu_read_unlock();
1634 			} else {
1635 				kfree_skb(skb);
1636 			}
1637 
1638 			dev_put(dev);
1639 		} else if (!sched_next || tdif < sched_next)
1640 			sched_next = tdif;
1641 	}
1642 	del_timer(&tbl->proxy_timer);
1643 	if (sched_next)
1644 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1645 	spin_unlock(&tbl->proxy_queue.lock);
1646 }
1647 
1648 static unsigned long neigh_proxy_delay(struct neigh_parms *p)
1649 {
1650 	/* If proxy_delay is zero, do not call get_random_u32_below()
1651 	 * as it is undefined behavior.
1652 	 */
1653 	unsigned long proxy_delay = NEIGH_VAR(p, PROXY_DELAY);
1654 
1655 	return proxy_delay ?
1656 	       jiffies + get_random_u32_below(proxy_delay) : jiffies;
1657 }
1658 
1659 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1660 		    struct sk_buff *skb)
1661 {
1662 	unsigned long sched_next = neigh_proxy_delay(p);
1663 
1664 	if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1665 		kfree_skb(skb);
1666 		return;
1667 	}
1668 
1669 	NEIGH_CB(skb)->sched_next = sched_next;
1670 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1671 
1672 	spin_lock(&tbl->proxy_queue.lock);
1673 	if (del_timer(&tbl->proxy_timer)) {
1674 		if (time_before(tbl->proxy_timer.expires, sched_next))
1675 			sched_next = tbl->proxy_timer.expires;
1676 	}
1677 	skb_dst_drop(skb);
1678 	dev_hold(skb->dev);
1679 	__skb_queue_tail(&tbl->proxy_queue, skb);
1680 	p->qlen++;
1681 	mod_timer(&tbl->proxy_timer, sched_next);
1682 	spin_unlock(&tbl->proxy_queue.lock);
1683 }
1684 EXPORT_SYMBOL(pneigh_enqueue);
1685 
1686 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1687 						      struct net *net, int ifindex)
1688 {
1689 	struct neigh_parms *p;
1690 
1691 	list_for_each_entry(p, &tbl->parms_list, list) {
1692 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1693 		    (!p->dev && !ifindex && net_eq(net, &init_net)))
1694 			return p;
1695 	}
1696 
1697 	return NULL;
1698 }
1699 
1700 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1701 				      struct neigh_table *tbl)
1702 {
1703 	struct neigh_parms *p;
1704 	struct net *net = dev_net(dev);
1705 	const struct net_device_ops *ops = dev->netdev_ops;
1706 
1707 	p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1708 	if (p) {
1709 		p->tbl		  = tbl;
1710 		refcount_set(&p->refcnt, 1);
1711 		p->reachable_time =
1712 				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1713 		p->qlen = 0;
1714 		netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
1715 		p->dev = dev;
1716 		write_pnet(&p->net, net);
1717 		p->sysctl_table = NULL;
1718 
1719 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1720 			netdev_put(dev, &p->dev_tracker);
1721 			kfree(p);
1722 			return NULL;
1723 		}
1724 
1725 		write_lock_bh(&tbl->lock);
1726 		list_add(&p->list, &tbl->parms.list);
1727 		write_unlock_bh(&tbl->lock);
1728 
1729 		neigh_parms_data_state_cleanall(p);
1730 	}
1731 	return p;
1732 }
1733 EXPORT_SYMBOL(neigh_parms_alloc);
1734 
1735 static void neigh_rcu_free_parms(struct rcu_head *head)
1736 {
1737 	struct neigh_parms *parms =
1738 		container_of(head, struct neigh_parms, rcu_head);
1739 
1740 	neigh_parms_put(parms);
1741 }
1742 
1743 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1744 {
1745 	if (!parms || parms == &tbl->parms)
1746 		return;
1747 	write_lock_bh(&tbl->lock);
1748 	list_del(&parms->list);
1749 	parms->dead = 1;
1750 	write_unlock_bh(&tbl->lock);
1751 	netdev_put(parms->dev, &parms->dev_tracker);
1752 	call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1753 }
1754 EXPORT_SYMBOL(neigh_parms_release);
1755 
1756 static void neigh_parms_destroy(struct neigh_parms *parms)
1757 {
1758 	kfree(parms);
1759 }
1760 
1761 static struct lock_class_key neigh_table_proxy_queue_class;
1762 
1763 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1764 
1765 void neigh_table_init(int index, struct neigh_table *tbl)
1766 {
1767 	unsigned long now = jiffies;
1768 	unsigned long phsize;
1769 
1770 	INIT_LIST_HEAD(&tbl->parms_list);
1771 	INIT_LIST_HEAD(&tbl->gc_list);
1772 	INIT_LIST_HEAD(&tbl->managed_list);
1773 
1774 	list_add(&tbl->parms.list, &tbl->parms_list);
1775 	write_pnet(&tbl->parms.net, &init_net);
1776 	refcount_set(&tbl->parms.refcnt, 1);
1777 	tbl->parms.reachable_time =
1778 			  neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1779 	tbl->parms.qlen = 0;
1780 
1781 	tbl->stats = alloc_percpu(struct neigh_statistics);
1782 	if (!tbl->stats)
1783 		panic("cannot create neighbour cache statistics");
1784 
1785 #ifdef CONFIG_PROC_FS
1786 	if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1787 			      &neigh_stat_seq_ops, tbl))
1788 		panic("cannot create neighbour proc dir entry");
1789 #endif
1790 
1791 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1792 
1793 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1794 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1795 
1796 	if (!tbl->nht || !tbl->phash_buckets)
1797 		panic("cannot allocate neighbour cache hashes");
1798 
1799 	if (!tbl->entry_size)
1800 		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1801 					tbl->key_len, NEIGH_PRIV_ALIGN);
1802 	else
1803 		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1804 
1805 	rwlock_init(&tbl->lock);
1806 
1807 	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1808 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1809 			tbl->parms.reachable_time);
1810 	INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
1811 	queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
1812 
1813 	timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1814 	skb_queue_head_init_class(&tbl->proxy_queue,
1815 			&neigh_table_proxy_queue_class);
1816 
1817 	tbl->last_flush = now;
1818 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1819 
1820 	neigh_tables[index] = tbl;
1821 }
1822 EXPORT_SYMBOL(neigh_table_init);
1823 
1824 int neigh_table_clear(int index, struct neigh_table *tbl)
1825 {
1826 	neigh_tables[index] = NULL;
1827 	/* It is not clean... Fix it to unload IPv6 module safely */
1828 	cancel_delayed_work_sync(&tbl->managed_work);
1829 	cancel_delayed_work_sync(&tbl->gc_work);
1830 	del_timer_sync(&tbl->proxy_timer);
1831 	pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family);
1832 	neigh_ifdown(tbl, NULL);
1833 	if (atomic_read(&tbl->entries))
1834 		pr_crit("neighbour leakage\n");
1835 
1836 	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1837 		 neigh_hash_free_rcu);
1838 	tbl->nht = NULL;
1839 
1840 	kfree(tbl->phash_buckets);
1841 	tbl->phash_buckets = NULL;
1842 
1843 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1844 
1845 	free_percpu(tbl->stats);
1846 	tbl->stats = NULL;
1847 
1848 	return 0;
1849 }
1850 EXPORT_SYMBOL(neigh_table_clear);
1851 
1852 static struct neigh_table *neigh_find_table(int family)
1853 {
1854 	struct neigh_table *tbl = NULL;
1855 
1856 	switch (family) {
1857 	case AF_INET:
1858 		tbl = neigh_tables[NEIGH_ARP_TABLE];
1859 		break;
1860 	case AF_INET6:
1861 		tbl = neigh_tables[NEIGH_ND_TABLE];
1862 		break;
1863 	}
1864 
1865 	return tbl;
1866 }
1867 
1868 const struct nla_policy nda_policy[NDA_MAX+1] = {
1869 	[NDA_UNSPEC]		= { .strict_start_type = NDA_NH_ID },
1870 	[NDA_DST]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1871 	[NDA_LLADDR]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1872 	[NDA_CACHEINFO]		= { .len = sizeof(struct nda_cacheinfo) },
1873 	[NDA_PROBES]		= { .type = NLA_U32 },
1874 	[NDA_VLAN]		= { .type = NLA_U16 },
1875 	[NDA_PORT]		= { .type = NLA_U16 },
1876 	[NDA_VNI]		= { .type = NLA_U32 },
1877 	[NDA_IFINDEX]		= { .type = NLA_U32 },
1878 	[NDA_MASTER]		= { .type = NLA_U32 },
1879 	[NDA_PROTOCOL]		= { .type = NLA_U8 },
1880 	[NDA_NH_ID]		= { .type = NLA_U32 },
1881 	[NDA_FLAGS_EXT]		= NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
1882 	[NDA_FDB_EXT_ATTRS]	= { .type = NLA_NESTED },
1883 };
1884 
1885 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1886 			struct netlink_ext_ack *extack)
1887 {
1888 	struct net *net = sock_net(skb->sk);
1889 	struct ndmsg *ndm;
1890 	struct nlattr *dst_attr;
1891 	struct neigh_table *tbl;
1892 	struct neighbour *neigh;
1893 	struct net_device *dev = NULL;
1894 	int err = -EINVAL;
1895 
1896 	ASSERT_RTNL();
1897 	if (nlmsg_len(nlh) < sizeof(*ndm))
1898 		goto out;
1899 
1900 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1901 	if (!dst_attr) {
1902 		NL_SET_ERR_MSG(extack, "Network address not specified");
1903 		goto out;
1904 	}
1905 
1906 	ndm = nlmsg_data(nlh);
1907 	if (ndm->ndm_ifindex) {
1908 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1909 		if (dev == NULL) {
1910 			err = -ENODEV;
1911 			goto out;
1912 		}
1913 	}
1914 
1915 	tbl = neigh_find_table(ndm->ndm_family);
1916 	if (tbl == NULL)
1917 		return -EAFNOSUPPORT;
1918 
1919 	if (nla_len(dst_attr) < (int)tbl->key_len) {
1920 		NL_SET_ERR_MSG(extack, "Invalid network address");
1921 		goto out;
1922 	}
1923 
1924 	if (ndm->ndm_flags & NTF_PROXY) {
1925 		err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1926 		goto out;
1927 	}
1928 
1929 	if (dev == NULL)
1930 		goto out;
1931 
1932 	neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1933 	if (neigh == NULL) {
1934 		err = -ENOENT;
1935 		goto out;
1936 	}
1937 
1938 	err = __neigh_update(neigh, NULL, NUD_FAILED,
1939 			     NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1940 			     NETLINK_CB(skb).portid, extack);
1941 	write_lock_bh(&tbl->lock);
1942 	neigh_release(neigh);
1943 	neigh_remove_one(neigh, tbl);
1944 	write_unlock_bh(&tbl->lock);
1945 
1946 out:
1947 	return err;
1948 }
1949 
1950 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1951 		     struct netlink_ext_ack *extack)
1952 {
1953 	int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1954 		    NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1955 	struct net *net = sock_net(skb->sk);
1956 	struct ndmsg *ndm;
1957 	struct nlattr *tb[NDA_MAX+1];
1958 	struct neigh_table *tbl;
1959 	struct net_device *dev = NULL;
1960 	struct neighbour *neigh;
1961 	void *dst, *lladdr;
1962 	u8 protocol = 0;
1963 	u32 ndm_flags;
1964 	int err;
1965 
1966 	ASSERT_RTNL();
1967 	err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
1968 				     nda_policy, extack);
1969 	if (err < 0)
1970 		goto out;
1971 
1972 	err = -EINVAL;
1973 	if (!tb[NDA_DST]) {
1974 		NL_SET_ERR_MSG(extack, "Network address not specified");
1975 		goto out;
1976 	}
1977 
1978 	ndm = nlmsg_data(nlh);
1979 	ndm_flags = ndm->ndm_flags;
1980 	if (tb[NDA_FLAGS_EXT]) {
1981 		u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
1982 
1983 		BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
1984 			     (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
1985 			      hweight32(NTF_EXT_MASK)));
1986 		ndm_flags |= (ext << NTF_EXT_SHIFT);
1987 	}
1988 	if (ndm->ndm_ifindex) {
1989 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1990 		if (dev == NULL) {
1991 			err = -ENODEV;
1992 			goto out;
1993 		}
1994 
1995 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
1996 			NL_SET_ERR_MSG(extack, "Invalid link address");
1997 			goto out;
1998 		}
1999 	}
2000 
2001 	tbl = neigh_find_table(ndm->ndm_family);
2002 	if (tbl == NULL)
2003 		return -EAFNOSUPPORT;
2004 
2005 	if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
2006 		NL_SET_ERR_MSG(extack, "Invalid network address");
2007 		goto out;
2008 	}
2009 
2010 	dst = nla_data(tb[NDA_DST]);
2011 	lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
2012 
2013 	if (tb[NDA_PROTOCOL])
2014 		protocol = nla_get_u8(tb[NDA_PROTOCOL]);
2015 	if (ndm_flags & NTF_PROXY) {
2016 		struct pneigh_entry *pn;
2017 
2018 		if (ndm_flags & NTF_MANAGED) {
2019 			NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
2020 			goto out;
2021 		}
2022 
2023 		err = -ENOBUFS;
2024 		pn = pneigh_lookup(tbl, net, dst, dev, 1);
2025 		if (pn) {
2026 			pn->flags = ndm_flags;
2027 			if (protocol)
2028 				pn->protocol = protocol;
2029 			err = 0;
2030 		}
2031 		goto out;
2032 	}
2033 
2034 	if (!dev) {
2035 		NL_SET_ERR_MSG(extack, "Device not specified");
2036 		goto out;
2037 	}
2038 
2039 	if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
2040 		err = -EINVAL;
2041 		goto out;
2042 	}
2043 
2044 	neigh = neigh_lookup(tbl, dst, dev);
2045 	if (neigh == NULL) {
2046 		bool ndm_permanent  = ndm->ndm_state & NUD_PERMANENT;
2047 		bool exempt_from_gc = ndm_permanent ||
2048 				      ndm_flags & NTF_EXT_LEARNED;
2049 
2050 		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
2051 			err = -ENOENT;
2052 			goto out;
2053 		}
2054 		if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
2055 			NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
2056 			err = -EINVAL;
2057 			goto out;
2058 		}
2059 
2060 		neigh = ___neigh_create(tbl, dst, dev,
2061 					ndm_flags &
2062 					(NTF_EXT_LEARNED | NTF_MANAGED),
2063 					exempt_from_gc, true);
2064 		if (IS_ERR(neigh)) {
2065 			err = PTR_ERR(neigh);
2066 			goto out;
2067 		}
2068 	} else {
2069 		if (nlh->nlmsg_flags & NLM_F_EXCL) {
2070 			err = -EEXIST;
2071 			neigh_release(neigh);
2072 			goto out;
2073 		}
2074 
2075 		if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
2076 			flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
2077 				   NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
2078 	}
2079 
2080 	if (protocol)
2081 		neigh->protocol = protocol;
2082 	if (ndm_flags & NTF_EXT_LEARNED)
2083 		flags |= NEIGH_UPDATE_F_EXT_LEARNED;
2084 	if (ndm_flags & NTF_ROUTER)
2085 		flags |= NEIGH_UPDATE_F_ISROUTER;
2086 	if (ndm_flags & NTF_MANAGED)
2087 		flags |= NEIGH_UPDATE_F_MANAGED;
2088 	if (ndm_flags & NTF_USE)
2089 		flags |= NEIGH_UPDATE_F_USE;
2090 
2091 	err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
2092 			     NETLINK_CB(skb).portid, extack);
2093 	if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) {
2094 		neigh_event_send(neigh, NULL);
2095 		err = 0;
2096 	}
2097 	neigh_release(neigh);
2098 out:
2099 	return err;
2100 }
2101 
2102 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
2103 {
2104 	struct nlattr *nest;
2105 
2106 	nest = nla_nest_start_noflag(skb, NDTA_PARMS);
2107 	if (nest == NULL)
2108 		return -ENOBUFS;
2109 
2110 	if ((parms->dev &&
2111 	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
2112 	    nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
2113 	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
2114 			NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
2115 	    /* approximative value for deprecated QUEUE_LEN (in packets) */
2116 	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
2117 			NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2118 	    nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2119 	    nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2120 	    nla_put_u32(skb, NDTPA_UCAST_PROBES,
2121 			NEIGH_VAR(parms, UCAST_PROBES)) ||
2122 	    nla_put_u32(skb, NDTPA_MCAST_PROBES,
2123 			NEIGH_VAR(parms, MCAST_PROBES)) ||
2124 	    nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2125 			NEIGH_VAR(parms, MCAST_REPROBES)) ||
2126 	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
2127 			  NDTPA_PAD) ||
2128 	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2129 			  NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2130 	    nla_put_msecs(skb, NDTPA_GC_STALETIME,
2131 			  NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2132 	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2133 			  NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2134 	    nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2135 			  NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2136 	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2137 			  NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2138 	    nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2139 			  NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2140 	    nla_put_msecs(skb, NDTPA_LOCKTIME,
2141 			  NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) ||
2142 	    nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS,
2143 			  NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD))
2144 		goto nla_put_failure;
2145 	return nla_nest_end(skb, nest);
2146 
2147 nla_put_failure:
2148 	nla_nest_cancel(skb, nest);
2149 	return -EMSGSIZE;
2150 }
2151 
2152 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2153 			      u32 pid, u32 seq, int type, int flags)
2154 {
2155 	struct nlmsghdr *nlh;
2156 	struct ndtmsg *ndtmsg;
2157 
2158 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2159 	if (nlh == NULL)
2160 		return -EMSGSIZE;
2161 
2162 	ndtmsg = nlmsg_data(nlh);
2163 
2164 	read_lock_bh(&tbl->lock);
2165 	ndtmsg->ndtm_family = tbl->family;
2166 	ndtmsg->ndtm_pad1   = 0;
2167 	ndtmsg->ndtm_pad2   = 0;
2168 
2169 	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2170 	    nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
2171 	    nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
2172 	    nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
2173 	    nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
2174 		goto nla_put_failure;
2175 	{
2176 		unsigned long now = jiffies;
2177 		long flush_delta = now - tbl->last_flush;
2178 		long rand_delta = now - tbl->last_rand;
2179 		struct neigh_hash_table *nht;
2180 		struct ndt_config ndc = {
2181 			.ndtc_key_len		= tbl->key_len,
2182 			.ndtc_entry_size	= tbl->entry_size,
2183 			.ndtc_entries		= atomic_read(&tbl->entries),
2184 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
2185 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
2186 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
2187 		};
2188 
2189 		rcu_read_lock();
2190 		nht = rcu_dereference(tbl->nht);
2191 		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2192 		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2193 		rcu_read_unlock();
2194 
2195 		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2196 			goto nla_put_failure;
2197 	}
2198 
2199 	{
2200 		int cpu;
2201 		struct ndt_stats ndst;
2202 
2203 		memset(&ndst, 0, sizeof(ndst));
2204 
2205 		for_each_possible_cpu(cpu) {
2206 			struct neigh_statistics	*st;
2207 
2208 			st = per_cpu_ptr(tbl->stats, cpu);
2209 			ndst.ndts_allocs		+= st->allocs;
2210 			ndst.ndts_destroys		+= st->destroys;
2211 			ndst.ndts_hash_grows		+= st->hash_grows;
2212 			ndst.ndts_res_failed		+= st->res_failed;
2213 			ndst.ndts_lookups		+= st->lookups;
2214 			ndst.ndts_hits			+= st->hits;
2215 			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
2216 			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
2217 			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
2218 			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
2219 			ndst.ndts_table_fulls		+= st->table_fulls;
2220 		}
2221 
2222 		if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2223 				  NDTA_PAD))
2224 			goto nla_put_failure;
2225 	}
2226 
2227 	BUG_ON(tbl->parms.dev);
2228 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2229 		goto nla_put_failure;
2230 
2231 	read_unlock_bh(&tbl->lock);
2232 	nlmsg_end(skb, nlh);
2233 	return 0;
2234 
2235 nla_put_failure:
2236 	read_unlock_bh(&tbl->lock);
2237 	nlmsg_cancel(skb, nlh);
2238 	return -EMSGSIZE;
2239 }
2240 
2241 static int neightbl_fill_param_info(struct sk_buff *skb,
2242 				    struct neigh_table *tbl,
2243 				    struct neigh_parms *parms,
2244 				    u32 pid, u32 seq, int type,
2245 				    unsigned int flags)
2246 {
2247 	struct ndtmsg *ndtmsg;
2248 	struct nlmsghdr *nlh;
2249 
2250 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2251 	if (nlh == NULL)
2252 		return -EMSGSIZE;
2253 
2254 	ndtmsg = nlmsg_data(nlh);
2255 
2256 	read_lock_bh(&tbl->lock);
2257 	ndtmsg->ndtm_family = tbl->family;
2258 	ndtmsg->ndtm_pad1   = 0;
2259 	ndtmsg->ndtm_pad2   = 0;
2260 
2261 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2262 	    neightbl_fill_parms(skb, parms) < 0)
2263 		goto errout;
2264 
2265 	read_unlock_bh(&tbl->lock);
2266 	nlmsg_end(skb, nlh);
2267 	return 0;
2268 errout:
2269 	read_unlock_bh(&tbl->lock);
2270 	nlmsg_cancel(skb, nlh);
2271 	return -EMSGSIZE;
2272 }
2273 
2274 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2275 	[NDTA_NAME]		= { .type = NLA_STRING },
2276 	[NDTA_THRESH1]		= { .type = NLA_U32 },
2277 	[NDTA_THRESH2]		= { .type = NLA_U32 },
2278 	[NDTA_THRESH3]		= { .type = NLA_U32 },
2279 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
2280 	[NDTA_PARMS]		= { .type = NLA_NESTED },
2281 };
2282 
2283 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2284 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
2285 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
2286 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
2287 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
2288 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
2289 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
2290 	[NDTPA_MCAST_REPROBES]		= { .type = NLA_U32 },
2291 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
2292 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
2293 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
2294 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
2295 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
2296 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
2297 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
2298 	[NDTPA_INTERVAL_PROBE_TIME_MS]	= { .type = NLA_U64, .min = 1 },
2299 };
2300 
2301 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2302 			struct netlink_ext_ack *extack)
2303 {
2304 	struct net *net = sock_net(skb->sk);
2305 	struct neigh_table *tbl;
2306 	struct ndtmsg *ndtmsg;
2307 	struct nlattr *tb[NDTA_MAX+1];
2308 	bool found = false;
2309 	int err, tidx;
2310 
2311 	err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2312 				     nl_neightbl_policy, extack);
2313 	if (err < 0)
2314 		goto errout;
2315 
2316 	if (tb[NDTA_NAME] == NULL) {
2317 		err = -EINVAL;
2318 		goto errout;
2319 	}
2320 
2321 	ndtmsg = nlmsg_data(nlh);
2322 
2323 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2324 		tbl = neigh_tables[tidx];
2325 		if (!tbl)
2326 			continue;
2327 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2328 			continue;
2329 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2330 			found = true;
2331 			break;
2332 		}
2333 	}
2334 
2335 	if (!found)
2336 		return -ENOENT;
2337 
2338 	/*
2339 	 * We acquire tbl->lock to be nice to the periodic timers and
2340 	 * make sure they always see a consistent set of values.
2341 	 */
2342 	write_lock_bh(&tbl->lock);
2343 
2344 	if (tb[NDTA_PARMS]) {
2345 		struct nlattr *tbp[NDTPA_MAX+1];
2346 		struct neigh_parms *p;
2347 		int i, ifindex = 0;
2348 
2349 		err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2350 						  tb[NDTA_PARMS],
2351 						  nl_ntbl_parm_policy, extack);
2352 		if (err < 0)
2353 			goto errout_tbl_lock;
2354 
2355 		if (tbp[NDTPA_IFINDEX])
2356 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2357 
2358 		p = lookup_neigh_parms(tbl, net, ifindex);
2359 		if (p == NULL) {
2360 			err = -ENOENT;
2361 			goto errout_tbl_lock;
2362 		}
2363 
2364 		for (i = 1; i <= NDTPA_MAX; i++) {
2365 			if (tbp[i] == NULL)
2366 				continue;
2367 
2368 			switch (i) {
2369 			case NDTPA_QUEUE_LEN:
2370 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2371 					      nla_get_u32(tbp[i]) *
2372 					      SKB_TRUESIZE(ETH_FRAME_LEN));
2373 				break;
2374 			case NDTPA_QUEUE_LENBYTES:
2375 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2376 					      nla_get_u32(tbp[i]));
2377 				break;
2378 			case NDTPA_PROXY_QLEN:
2379 				NEIGH_VAR_SET(p, PROXY_QLEN,
2380 					      nla_get_u32(tbp[i]));
2381 				break;
2382 			case NDTPA_APP_PROBES:
2383 				NEIGH_VAR_SET(p, APP_PROBES,
2384 					      nla_get_u32(tbp[i]));
2385 				break;
2386 			case NDTPA_UCAST_PROBES:
2387 				NEIGH_VAR_SET(p, UCAST_PROBES,
2388 					      nla_get_u32(tbp[i]));
2389 				break;
2390 			case NDTPA_MCAST_PROBES:
2391 				NEIGH_VAR_SET(p, MCAST_PROBES,
2392 					      nla_get_u32(tbp[i]));
2393 				break;
2394 			case NDTPA_MCAST_REPROBES:
2395 				NEIGH_VAR_SET(p, MCAST_REPROBES,
2396 					      nla_get_u32(tbp[i]));
2397 				break;
2398 			case NDTPA_BASE_REACHABLE_TIME:
2399 				NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2400 					      nla_get_msecs(tbp[i]));
2401 				/* update reachable_time as well, otherwise, the change will
2402 				 * only be effective after the next time neigh_periodic_work
2403 				 * decides to recompute it (can be multiple minutes)
2404 				 */
2405 				p->reachable_time =
2406 					neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2407 				break;
2408 			case NDTPA_GC_STALETIME:
2409 				NEIGH_VAR_SET(p, GC_STALETIME,
2410 					      nla_get_msecs(tbp[i]));
2411 				break;
2412 			case NDTPA_DELAY_PROBE_TIME:
2413 				NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2414 					      nla_get_msecs(tbp[i]));
2415 				call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2416 				break;
2417 			case NDTPA_INTERVAL_PROBE_TIME_MS:
2418 				NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS,
2419 					      nla_get_msecs(tbp[i]));
2420 				break;
2421 			case NDTPA_RETRANS_TIME:
2422 				NEIGH_VAR_SET(p, RETRANS_TIME,
2423 					      nla_get_msecs(tbp[i]));
2424 				break;
2425 			case NDTPA_ANYCAST_DELAY:
2426 				NEIGH_VAR_SET(p, ANYCAST_DELAY,
2427 					      nla_get_msecs(tbp[i]));
2428 				break;
2429 			case NDTPA_PROXY_DELAY:
2430 				NEIGH_VAR_SET(p, PROXY_DELAY,
2431 					      nla_get_msecs(tbp[i]));
2432 				break;
2433 			case NDTPA_LOCKTIME:
2434 				NEIGH_VAR_SET(p, LOCKTIME,
2435 					      nla_get_msecs(tbp[i]));
2436 				break;
2437 			}
2438 		}
2439 	}
2440 
2441 	err = -ENOENT;
2442 	if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2443 	     tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2444 	    !net_eq(net, &init_net))
2445 		goto errout_tbl_lock;
2446 
2447 	if (tb[NDTA_THRESH1])
2448 		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2449 
2450 	if (tb[NDTA_THRESH2])
2451 		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2452 
2453 	if (tb[NDTA_THRESH3])
2454 		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2455 
2456 	if (tb[NDTA_GC_INTERVAL])
2457 		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2458 
2459 	err = 0;
2460 
2461 errout_tbl_lock:
2462 	write_unlock_bh(&tbl->lock);
2463 errout:
2464 	return err;
2465 }
2466 
2467 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2468 				    struct netlink_ext_ack *extack)
2469 {
2470 	struct ndtmsg *ndtm;
2471 
2472 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2473 		NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2474 		return -EINVAL;
2475 	}
2476 
2477 	ndtm = nlmsg_data(nlh);
2478 	if (ndtm->ndtm_pad1  || ndtm->ndtm_pad2) {
2479 		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2480 		return -EINVAL;
2481 	}
2482 
2483 	if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2484 		NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2485 		return -EINVAL;
2486 	}
2487 
2488 	return 0;
2489 }
2490 
2491 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2492 {
2493 	const struct nlmsghdr *nlh = cb->nlh;
2494 	struct net *net = sock_net(skb->sk);
2495 	int family, tidx, nidx = 0;
2496 	int tbl_skip = cb->args[0];
2497 	int neigh_skip = cb->args[1];
2498 	struct neigh_table *tbl;
2499 
2500 	if (cb->strict_check) {
2501 		int err = neightbl_valid_dump_info(nlh, cb->extack);
2502 
2503 		if (err < 0)
2504 			return err;
2505 	}
2506 
2507 	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2508 
2509 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2510 		struct neigh_parms *p;
2511 
2512 		tbl = neigh_tables[tidx];
2513 		if (!tbl)
2514 			continue;
2515 
2516 		if (tidx < tbl_skip || (family && tbl->family != family))
2517 			continue;
2518 
2519 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2520 				       nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2521 				       NLM_F_MULTI) < 0)
2522 			break;
2523 
2524 		nidx = 0;
2525 		p = list_next_entry(&tbl->parms, list);
2526 		list_for_each_entry_from(p, &tbl->parms_list, list) {
2527 			if (!net_eq(neigh_parms_net(p), net))
2528 				continue;
2529 
2530 			if (nidx < neigh_skip)
2531 				goto next;
2532 
2533 			if (neightbl_fill_param_info(skb, tbl, p,
2534 						     NETLINK_CB(cb->skb).portid,
2535 						     nlh->nlmsg_seq,
2536 						     RTM_NEWNEIGHTBL,
2537 						     NLM_F_MULTI) < 0)
2538 				goto out;
2539 		next:
2540 			nidx++;
2541 		}
2542 
2543 		neigh_skip = 0;
2544 	}
2545 out:
2546 	cb->args[0] = tidx;
2547 	cb->args[1] = nidx;
2548 
2549 	return skb->len;
2550 }
2551 
2552 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2553 			   u32 pid, u32 seq, int type, unsigned int flags)
2554 {
2555 	u32 neigh_flags, neigh_flags_ext;
2556 	unsigned long now = jiffies;
2557 	struct nda_cacheinfo ci;
2558 	struct nlmsghdr *nlh;
2559 	struct ndmsg *ndm;
2560 
2561 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2562 	if (nlh == NULL)
2563 		return -EMSGSIZE;
2564 
2565 	neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
2566 	neigh_flags     = neigh->flags & NTF_OLD_MASK;
2567 
2568 	ndm = nlmsg_data(nlh);
2569 	ndm->ndm_family	 = neigh->ops->family;
2570 	ndm->ndm_pad1    = 0;
2571 	ndm->ndm_pad2    = 0;
2572 	ndm->ndm_flags	 = neigh_flags;
2573 	ndm->ndm_type	 = neigh->type;
2574 	ndm->ndm_ifindex = neigh->dev->ifindex;
2575 
2576 	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2577 		goto nla_put_failure;
2578 
2579 	read_lock_bh(&neigh->lock);
2580 	ndm->ndm_state	 = neigh->nud_state;
2581 	if (neigh->nud_state & NUD_VALID) {
2582 		char haddr[MAX_ADDR_LEN];
2583 
2584 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2585 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2586 			read_unlock_bh(&neigh->lock);
2587 			goto nla_put_failure;
2588 		}
2589 	}
2590 
2591 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2592 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2593 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2594 	ci.ndm_refcnt	 = refcount_read(&neigh->refcnt) - 1;
2595 	read_unlock_bh(&neigh->lock);
2596 
2597 	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2598 	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2599 		goto nla_put_failure;
2600 
2601 	if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2602 		goto nla_put_failure;
2603 	if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2604 		goto nla_put_failure;
2605 
2606 	nlmsg_end(skb, nlh);
2607 	return 0;
2608 
2609 nla_put_failure:
2610 	nlmsg_cancel(skb, nlh);
2611 	return -EMSGSIZE;
2612 }
2613 
2614 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2615 			    u32 pid, u32 seq, int type, unsigned int flags,
2616 			    struct neigh_table *tbl)
2617 {
2618 	u32 neigh_flags, neigh_flags_ext;
2619 	struct nlmsghdr *nlh;
2620 	struct ndmsg *ndm;
2621 
2622 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2623 	if (nlh == NULL)
2624 		return -EMSGSIZE;
2625 
2626 	neigh_flags_ext = pn->flags >> NTF_EXT_SHIFT;
2627 	neigh_flags     = pn->flags & NTF_OLD_MASK;
2628 
2629 	ndm = nlmsg_data(nlh);
2630 	ndm->ndm_family	 = tbl->family;
2631 	ndm->ndm_pad1    = 0;
2632 	ndm->ndm_pad2    = 0;
2633 	ndm->ndm_flags	 = neigh_flags | NTF_PROXY;
2634 	ndm->ndm_type	 = RTN_UNICAST;
2635 	ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2636 	ndm->ndm_state	 = NUD_NONE;
2637 
2638 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2639 		goto nla_put_failure;
2640 
2641 	if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2642 		goto nla_put_failure;
2643 	if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2644 		goto nla_put_failure;
2645 
2646 	nlmsg_end(skb, nlh);
2647 	return 0;
2648 
2649 nla_put_failure:
2650 	nlmsg_cancel(skb, nlh);
2651 	return -EMSGSIZE;
2652 }
2653 
2654 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2655 {
2656 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2657 	__neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2658 }
2659 
2660 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2661 {
2662 	struct net_device *master;
2663 
2664 	if (!master_idx)
2665 		return false;
2666 
2667 	master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2668 
2669 	/* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
2670 	 * invalid value for ifindex to denote "no master".
2671 	 */
2672 	if (master_idx == -1)
2673 		return !!master;
2674 
2675 	if (!master || master->ifindex != master_idx)
2676 		return true;
2677 
2678 	return false;
2679 }
2680 
2681 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2682 {
2683 	if (filter_idx && (!dev || dev->ifindex != filter_idx))
2684 		return true;
2685 
2686 	return false;
2687 }
2688 
2689 struct neigh_dump_filter {
2690 	int master_idx;
2691 	int dev_idx;
2692 };
2693 
2694 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2695 			    struct netlink_callback *cb,
2696 			    struct neigh_dump_filter *filter)
2697 {
2698 	struct net *net = sock_net(skb->sk);
2699 	struct neighbour *n;
2700 	int rc, h, s_h = cb->args[1];
2701 	int idx, s_idx = idx = cb->args[2];
2702 	struct neigh_hash_table *nht;
2703 	unsigned int flags = NLM_F_MULTI;
2704 
2705 	if (filter->dev_idx || filter->master_idx)
2706 		flags |= NLM_F_DUMP_FILTERED;
2707 
2708 	rcu_read_lock();
2709 	nht = rcu_dereference(tbl->nht);
2710 
2711 	for (h = s_h; h < (1 << nht->hash_shift); h++) {
2712 		if (h > s_h)
2713 			s_idx = 0;
2714 		for (n = rcu_dereference(nht->hash_buckets[h]), idx = 0;
2715 		     n != NULL;
2716 		     n = rcu_dereference(n->next)) {
2717 			if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2718 				goto next;
2719 			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2720 			    neigh_master_filtered(n->dev, filter->master_idx))
2721 				goto next;
2722 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2723 					    cb->nlh->nlmsg_seq,
2724 					    RTM_NEWNEIGH,
2725 					    flags) < 0) {
2726 				rc = -1;
2727 				goto out;
2728 			}
2729 next:
2730 			idx++;
2731 		}
2732 	}
2733 	rc = skb->len;
2734 out:
2735 	rcu_read_unlock();
2736 	cb->args[1] = h;
2737 	cb->args[2] = idx;
2738 	return rc;
2739 }
2740 
2741 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2742 			     struct netlink_callback *cb,
2743 			     struct neigh_dump_filter *filter)
2744 {
2745 	struct pneigh_entry *n;
2746 	struct net *net = sock_net(skb->sk);
2747 	int rc, h, s_h = cb->args[3];
2748 	int idx, s_idx = idx = cb->args[4];
2749 	unsigned int flags = NLM_F_MULTI;
2750 
2751 	if (filter->dev_idx || filter->master_idx)
2752 		flags |= NLM_F_DUMP_FILTERED;
2753 
2754 	read_lock_bh(&tbl->lock);
2755 
2756 	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2757 		if (h > s_h)
2758 			s_idx = 0;
2759 		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2760 			if (idx < s_idx || pneigh_net(n) != net)
2761 				goto next;
2762 			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2763 			    neigh_master_filtered(n->dev, filter->master_idx))
2764 				goto next;
2765 			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2766 					    cb->nlh->nlmsg_seq,
2767 					    RTM_NEWNEIGH, flags, tbl) < 0) {
2768 				read_unlock_bh(&tbl->lock);
2769 				rc = -1;
2770 				goto out;
2771 			}
2772 		next:
2773 			idx++;
2774 		}
2775 	}
2776 
2777 	read_unlock_bh(&tbl->lock);
2778 	rc = skb->len;
2779 out:
2780 	cb->args[3] = h;
2781 	cb->args[4] = idx;
2782 	return rc;
2783 
2784 }
2785 
2786 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2787 				bool strict_check,
2788 				struct neigh_dump_filter *filter,
2789 				struct netlink_ext_ack *extack)
2790 {
2791 	struct nlattr *tb[NDA_MAX + 1];
2792 	int err, i;
2793 
2794 	if (strict_check) {
2795 		struct ndmsg *ndm;
2796 
2797 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2798 			NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2799 			return -EINVAL;
2800 		}
2801 
2802 		ndm = nlmsg_data(nlh);
2803 		if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_ifindex ||
2804 		    ndm->ndm_state || ndm->ndm_type) {
2805 			NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2806 			return -EINVAL;
2807 		}
2808 
2809 		if (ndm->ndm_flags & ~NTF_PROXY) {
2810 			NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2811 			return -EINVAL;
2812 		}
2813 
2814 		err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2815 						    tb, NDA_MAX, nda_policy,
2816 						    extack);
2817 	} else {
2818 		err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2819 					     NDA_MAX, nda_policy, extack);
2820 	}
2821 	if (err < 0)
2822 		return err;
2823 
2824 	for (i = 0; i <= NDA_MAX; ++i) {
2825 		if (!tb[i])
2826 			continue;
2827 
2828 		/* all new attributes should require strict_check */
2829 		switch (i) {
2830 		case NDA_IFINDEX:
2831 			filter->dev_idx = nla_get_u32(tb[i]);
2832 			break;
2833 		case NDA_MASTER:
2834 			filter->master_idx = nla_get_u32(tb[i]);
2835 			break;
2836 		default:
2837 			if (strict_check) {
2838 				NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2839 				return -EINVAL;
2840 			}
2841 		}
2842 	}
2843 
2844 	return 0;
2845 }
2846 
2847 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2848 {
2849 	const struct nlmsghdr *nlh = cb->nlh;
2850 	struct neigh_dump_filter filter = {};
2851 	struct neigh_table *tbl;
2852 	int t, family, s_t;
2853 	int proxy = 0;
2854 	int err;
2855 
2856 	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2857 
2858 	/* check for full ndmsg structure presence, family member is
2859 	 * the same for both structures
2860 	 */
2861 	if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2862 	    ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2863 		proxy = 1;
2864 
2865 	err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2866 	if (err < 0 && cb->strict_check)
2867 		return err;
2868 
2869 	s_t = cb->args[0];
2870 
2871 	for (t = 0; t < NEIGH_NR_TABLES; t++) {
2872 		tbl = neigh_tables[t];
2873 
2874 		if (!tbl)
2875 			continue;
2876 		if (t < s_t || (family && tbl->family != family))
2877 			continue;
2878 		if (t > s_t)
2879 			memset(&cb->args[1], 0, sizeof(cb->args) -
2880 						sizeof(cb->args[0]));
2881 		if (proxy)
2882 			err = pneigh_dump_table(tbl, skb, cb, &filter);
2883 		else
2884 			err = neigh_dump_table(tbl, skb, cb, &filter);
2885 		if (err < 0)
2886 			break;
2887 	}
2888 
2889 	cb->args[0] = t;
2890 	return skb->len;
2891 }
2892 
2893 static int neigh_valid_get_req(const struct nlmsghdr *nlh,
2894 			       struct neigh_table **tbl,
2895 			       void **dst, int *dev_idx, u8 *ndm_flags,
2896 			       struct netlink_ext_ack *extack)
2897 {
2898 	struct nlattr *tb[NDA_MAX + 1];
2899 	struct ndmsg *ndm;
2900 	int err, i;
2901 
2902 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2903 		NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2904 		return -EINVAL;
2905 	}
2906 
2907 	ndm = nlmsg_data(nlh);
2908 	if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_state ||
2909 	    ndm->ndm_type) {
2910 		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2911 		return -EINVAL;
2912 	}
2913 
2914 	if (ndm->ndm_flags & ~NTF_PROXY) {
2915 		NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2916 		return -EINVAL;
2917 	}
2918 
2919 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
2920 					    NDA_MAX, nda_policy, extack);
2921 	if (err < 0)
2922 		return err;
2923 
2924 	*ndm_flags = ndm->ndm_flags;
2925 	*dev_idx = ndm->ndm_ifindex;
2926 	*tbl = neigh_find_table(ndm->ndm_family);
2927 	if (*tbl == NULL) {
2928 		NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
2929 		return -EAFNOSUPPORT;
2930 	}
2931 
2932 	for (i = 0; i <= NDA_MAX; ++i) {
2933 		if (!tb[i])
2934 			continue;
2935 
2936 		switch (i) {
2937 		case NDA_DST:
2938 			if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
2939 				NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
2940 				return -EINVAL;
2941 			}
2942 			*dst = nla_data(tb[i]);
2943 			break;
2944 		default:
2945 			NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
2946 			return -EINVAL;
2947 		}
2948 	}
2949 
2950 	return 0;
2951 }
2952 
2953 static inline size_t neigh_nlmsg_size(void)
2954 {
2955 	return NLMSG_ALIGN(sizeof(struct ndmsg))
2956 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2957 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2958 	       + nla_total_size(sizeof(struct nda_cacheinfo))
2959 	       + nla_total_size(4)  /* NDA_PROBES */
2960 	       + nla_total_size(4)  /* NDA_FLAGS_EXT */
2961 	       + nla_total_size(1); /* NDA_PROTOCOL */
2962 }
2963 
2964 static int neigh_get_reply(struct net *net, struct neighbour *neigh,
2965 			   u32 pid, u32 seq)
2966 {
2967 	struct sk_buff *skb;
2968 	int err = 0;
2969 
2970 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
2971 	if (!skb)
2972 		return -ENOBUFS;
2973 
2974 	err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
2975 	if (err) {
2976 		kfree_skb(skb);
2977 		goto errout;
2978 	}
2979 
2980 	err = rtnl_unicast(skb, net, pid);
2981 errout:
2982 	return err;
2983 }
2984 
2985 static inline size_t pneigh_nlmsg_size(void)
2986 {
2987 	return NLMSG_ALIGN(sizeof(struct ndmsg))
2988 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2989 	       + nla_total_size(4)  /* NDA_FLAGS_EXT */
2990 	       + nla_total_size(1); /* NDA_PROTOCOL */
2991 }
2992 
2993 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
2994 			    u32 pid, u32 seq, struct neigh_table *tbl)
2995 {
2996 	struct sk_buff *skb;
2997 	int err = 0;
2998 
2999 	skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
3000 	if (!skb)
3001 		return -ENOBUFS;
3002 
3003 	err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
3004 	if (err) {
3005 		kfree_skb(skb);
3006 		goto errout;
3007 	}
3008 
3009 	err = rtnl_unicast(skb, net, pid);
3010 errout:
3011 	return err;
3012 }
3013 
3014 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3015 		     struct netlink_ext_ack *extack)
3016 {
3017 	struct net *net = sock_net(in_skb->sk);
3018 	struct net_device *dev = NULL;
3019 	struct neigh_table *tbl = NULL;
3020 	struct neighbour *neigh;
3021 	void *dst = NULL;
3022 	u8 ndm_flags = 0;
3023 	int dev_idx = 0;
3024 	int err;
3025 
3026 	err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
3027 				  extack);
3028 	if (err < 0)
3029 		return err;
3030 
3031 	if (dev_idx) {
3032 		dev = __dev_get_by_index(net, dev_idx);
3033 		if (!dev) {
3034 			NL_SET_ERR_MSG(extack, "Unknown device ifindex");
3035 			return -ENODEV;
3036 		}
3037 	}
3038 
3039 	if (!dst) {
3040 		NL_SET_ERR_MSG(extack, "Network address not specified");
3041 		return -EINVAL;
3042 	}
3043 
3044 	if (ndm_flags & NTF_PROXY) {
3045 		struct pneigh_entry *pn;
3046 
3047 		pn = pneigh_lookup(tbl, net, dst, dev, 0);
3048 		if (!pn) {
3049 			NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
3050 			return -ENOENT;
3051 		}
3052 		return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
3053 					nlh->nlmsg_seq, tbl);
3054 	}
3055 
3056 	if (!dev) {
3057 		NL_SET_ERR_MSG(extack, "No device specified");
3058 		return -EINVAL;
3059 	}
3060 
3061 	neigh = neigh_lookup(tbl, dst, dev);
3062 	if (!neigh) {
3063 		NL_SET_ERR_MSG(extack, "Neighbour entry not found");
3064 		return -ENOENT;
3065 	}
3066 
3067 	err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
3068 			      nlh->nlmsg_seq);
3069 
3070 	neigh_release(neigh);
3071 
3072 	return err;
3073 }
3074 
3075 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
3076 {
3077 	int chain;
3078 	struct neigh_hash_table *nht;
3079 
3080 	rcu_read_lock();
3081 	nht = rcu_dereference(tbl->nht);
3082 
3083 	read_lock_bh(&tbl->lock); /* avoid resizes */
3084 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3085 		struct neighbour *n;
3086 
3087 		for (n = rcu_dereference(nht->hash_buckets[chain]);
3088 		     n != NULL;
3089 		     n = rcu_dereference(n->next))
3090 			cb(n, cookie);
3091 	}
3092 	read_unlock_bh(&tbl->lock);
3093 	rcu_read_unlock();
3094 }
3095 EXPORT_SYMBOL(neigh_for_each);
3096 
3097 /* The tbl->lock must be held as a writer and BH disabled. */
3098 void __neigh_for_each_release(struct neigh_table *tbl,
3099 			      int (*cb)(struct neighbour *))
3100 {
3101 	int chain;
3102 	struct neigh_hash_table *nht;
3103 
3104 	nht = rcu_dereference_protected(tbl->nht,
3105 					lockdep_is_held(&tbl->lock));
3106 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3107 		struct neighbour *n;
3108 		struct neighbour __rcu **np;
3109 
3110 		np = &nht->hash_buckets[chain];
3111 		while ((n = rcu_dereference_protected(*np,
3112 					lockdep_is_held(&tbl->lock))) != NULL) {
3113 			int release;
3114 
3115 			write_lock(&n->lock);
3116 			release = cb(n);
3117 			if (release) {
3118 				rcu_assign_pointer(*np,
3119 					rcu_dereference_protected(n->next,
3120 						lockdep_is_held(&tbl->lock)));
3121 				neigh_mark_dead(n);
3122 			} else
3123 				np = &n->next;
3124 			write_unlock(&n->lock);
3125 			if (release)
3126 				neigh_cleanup_and_release(n);
3127 		}
3128 	}
3129 }
3130 EXPORT_SYMBOL(__neigh_for_each_release);
3131 
3132 int neigh_xmit(int index, struct net_device *dev,
3133 	       const void *addr, struct sk_buff *skb)
3134 {
3135 	int err = -EAFNOSUPPORT;
3136 	if (likely(index < NEIGH_NR_TABLES)) {
3137 		struct neigh_table *tbl;
3138 		struct neighbour *neigh;
3139 
3140 		tbl = neigh_tables[index];
3141 		if (!tbl)
3142 			goto out;
3143 		rcu_read_lock();
3144 		if (index == NEIGH_ARP_TABLE) {
3145 			u32 key = *((u32 *)addr);
3146 
3147 			neigh = __ipv4_neigh_lookup_noref(dev, key);
3148 		} else {
3149 			neigh = __neigh_lookup_noref(tbl, addr, dev);
3150 		}
3151 		if (!neigh)
3152 			neigh = __neigh_create(tbl, addr, dev, false);
3153 		err = PTR_ERR(neigh);
3154 		if (IS_ERR(neigh)) {
3155 			rcu_read_unlock();
3156 			goto out_kfree_skb;
3157 		}
3158 		err = READ_ONCE(neigh->output)(neigh, skb);
3159 		rcu_read_unlock();
3160 	}
3161 	else if (index == NEIGH_LINK_TABLE) {
3162 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3163 				      addr, NULL, skb->len);
3164 		if (err < 0)
3165 			goto out_kfree_skb;
3166 		err = dev_queue_xmit(skb);
3167 	}
3168 out:
3169 	return err;
3170 out_kfree_skb:
3171 	kfree_skb(skb);
3172 	goto out;
3173 }
3174 EXPORT_SYMBOL(neigh_xmit);
3175 
3176 #ifdef CONFIG_PROC_FS
3177 
3178 static struct neighbour *neigh_get_first(struct seq_file *seq)
3179 {
3180 	struct neigh_seq_state *state = seq->private;
3181 	struct net *net = seq_file_net(seq);
3182 	struct neigh_hash_table *nht = state->nht;
3183 	struct neighbour *n = NULL;
3184 	int bucket;
3185 
3186 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3187 	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
3188 		n = rcu_dereference(nht->hash_buckets[bucket]);
3189 
3190 		while (n) {
3191 			if (!net_eq(dev_net(n->dev), net))
3192 				goto next;
3193 			if (state->neigh_sub_iter) {
3194 				loff_t fakep = 0;
3195 				void *v;
3196 
3197 				v = state->neigh_sub_iter(state, n, &fakep);
3198 				if (!v)
3199 					goto next;
3200 			}
3201 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3202 				break;
3203 			if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
3204 				break;
3205 next:
3206 			n = rcu_dereference(n->next);
3207 		}
3208 
3209 		if (n)
3210 			break;
3211 	}
3212 	state->bucket = bucket;
3213 
3214 	return n;
3215 }
3216 
3217 static struct neighbour *neigh_get_next(struct seq_file *seq,
3218 					struct neighbour *n,
3219 					loff_t *pos)
3220 {
3221 	struct neigh_seq_state *state = seq->private;
3222 	struct net *net = seq_file_net(seq);
3223 	struct neigh_hash_table *nht = state->nht;
3224 
3225 	if (state->neigh_sub_iter) {
3226 		void *v = state->neigh_sub_iter(state, n, pos);
3227 		if (v)
3228 			return n;
3229 	}
3230 	n = rcu_dereference(n->next);
3231 
3232 	while (1) {
3233 		while (n) {
3234 			if (!net_eq(dev_net(n->dev), net))
3235 				goto next;
3236 			if (state->neigh_sub_iter) {
3237 				void *v = state->neigh_sub_iter(state, n, pos);
3238 				if (v)
3239 					return n;
3240 				goto next;
3241 			}
3242 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3243 				break;
3244 
3245 			if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
3246 				break;
3247 next:
3248 			n = rcu_dereference(n->next);
3249 		}
3250 
3251 		if (n)
3252 			break;
3253 
3254 		if (++state->bucket >= (1 << nht->hash_shift))
3255 			break;
3256 
3257 		n = rcu_dereference(nht->hash_buckets[state->bucket]);
3258 	}
3259 
3260 	if (n && pos)
3261 		--(*pos);
3262 	return n;
3263 }
3264 
3265 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3266 {
3267 	struct neighbour *n = neigh_get_first(seq);
3268 
3269 	if (n) {
3270 		--(*pos);
3271 		while (*pos) {
3272 			n = neigh_get_next(seq, n, pos);
3273 			if (!n)
3274 				break;
3275 		}
3276 	}
3277 	return *pos ? NULL : n;
3278 }
3279 
3280 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3281 {
3282 	struct neigh_seq_state *state = seq->private;
3283 	struct net *net = seq_file_net(seq);
3284 	struct neigh_table *tbl = state->tbl;
3285 	struct pneigh_entry *pn = NULL;
3286 	int bucket;
3287 
3288 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
3289 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3290 		pn = tbl->phash_buckets[bucket];
3291 		while (pn && !net_eq(pneigh_net(pn), net))
3292 			pn = pn->next;
3293 		if (pn)
3294 			break;
3295 	}
3296 	state->bucket = bucket;
3297 
3298 	return pn;
3299 }
3300 
3301 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3302 					    struct pneigh_entry *pn,
3303 					    loff_t *pos)
3304 {
3305 	struct neigh_seq_state *state = seq->private;
3306 	struct net *net = seq_file_net(seq);
3307 	struct neigh_table *tbl = state->tbl;
3308 
3309 	do {
3310 		pn = pn->next;
3311 	} while (pn && !net_eq(pneigh_net(pn), net));
3312 
3313 	while (!pn) {
3314 		if (++state->bucket > PNEIGH_HASHMASK)
3315 			break;
3316 		pn = tbl->phash_buckets[state->bucket];
3317 		while (pn && !net_eq(pneigh_net(pn), net))
3318 			pn = pn->next;
3319 		if (pn)
3320 			break;
3321 	}
3322 
3323 	if (pn && pos)
3324 		--(*pos);
3325 
3326 	return pn;
3327 }
3328 
3329 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3330 {
3331 	struct pneigh_entry *pn = pneigh_get_first(seq);
3332 
3333 	if (pn) {
3334 		--(*pos);
3335 		while (*pos) {
3336 			pn = pneigh_get_next(seq, pn, pos);
3337 			if (!pn)
3338 				break;
3339 		}
3340 	}
3341 	return *pos ? NULL : pn;
3342 }
3343 
3344 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3345 {
3346 	struct neigh_seq_state *state = seq->private;
3347 	void *rc;
3348 	loff_t idxpos = *pos;
3349 
3350 	rc = neigh_get_idx(seq, &idxpos);
3351 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3352 		rc = pneigh_get_idx(seq, &idxpos);
3353 
3354 	return rc;
3355 }
3356 
3357 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3358 	__acquires(tbl->lock)
3359 	__acquires(rcu)
3360 {
3361 	struct neigh_seq_state *state = seq->private;
3362 
3363 	state->tbl = tbl;
3364 	state->bucket = 0;
3365 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3366 
3367 	rcu_read_lock();
3368 	state->nht = rcu_dereference(tbl->nht);
3369 	read_lock_bh(&tbl->lock);
3370 
3371 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3372 }
3373 EXPORT_SYMBOL(neigh_seq_start);
3374 
3375 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3376 {
3377 	struct neigh_seq_state *state;
3378 	void *rc;
3379 
3380 	if (v == SEQ_START_TOKEN) {
3381 		rc = neigh_get_first(seq);
3382 		goto out;
3383 	}
3384 
3385 	state = seq->private;
3386 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3387 		rc = neigh_get_next(seq, v, NULL);
3388 		if (rc)
3389 			goto out;
3390 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3391 			rc = pneigh_get_first(seq);
3392 	} else {
3393 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3394 		rc = pneigh_get_next(seq, v, NULL);
3395 	}
3396 out:
3397 	++(*pos);
3398 	return rc;
3399 }
3400 EXPORT_SYMBOL(neigh_seq_next);
3401 
3402 void neigh_seq_stop(struct seq_file *seq, void *v)
3403 	__releases(tbl->lock)
3404 	__releases(rcu)
3405 {
3406 	struct neigh_seq_state *state = seq->private;
3407 	struct neigh_table *tbl = state->tbl;
3408 
3409 	read_unlock_bh(&tbl->lock);
3410 	rcu_read_unlock();
3411 }
3412 EXPORT_SYMBOL(neigh_seq_stop);
3413 
3414 /* statistics via seq_file */
3415 
3416 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3417 {
3418 	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3419 	int cpu;
3420 
3421 	if (*pos == 0)
3422 		return SEQ_START_TOKEN;
3423 
3424 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3425 		if (!cpu_possible(cpu))
3426 			continue;
3427 		*pos = cpu+1;
3428 		return per_cpu_ptr(tbl->stats, cpu);
3429 	}
3430 	return NULL;
3431 }
3432 
3433 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3434 {
3435 	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3436 	int cpu;
3437 
3438 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3439 		if (!cpu_possible(cpu))
3440 			continue;
3441 		*pos = cpu+1;
3442 		return per_cpu_ptr(tbl->stats, cpu);
3443 	}
3444 	(*pos)++;
3445 	return NULL;
3446 }
3447 
3448 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3449 {
3450 
3451 }
3452 
3453 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3454 {
3455 	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3456 	struct neigh_statistics *st = v;
3457 
3458 	if (v == SEQ_START_TOKEN) {
3459 		seq_puts(seq, "entries  allocs   destroys hash_grows lookups  hits     res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3460 		return 0;
3461 	}
3462 
3463 	seq_printf(seq, "%08x %08lx %08lx %08lx   %08lx %08lx %08lx   "
3464 			"%08lx         %08lx         %08lx         "
3465 			"%08lx       %08lx            %08lx\n",
3466 		   atomic_read(&tbl->entries),
3467 
3468 		   st->allocs,
3469 		   st->destroys,
3470 		   st->hash_grows,
3471 
3472 		   st->lookups,
3473 		   st->hits,
3474 
3475 		   st->res_failed,
3476 
3477 		   st->rcv_probes_mcast,
3478 		   st->rcv_probes_ucast,
3479 
3480 		   st->periodic_gc_runs,
3481 		   st->forced_gc_runs,
3482 		   st->unres_discards,
3483 		   st->table_fulls
3484 		   );
3485 
3486 	return 0;
3487 }
3488 
3489 static const struct seq_operations neigh_stat_seq_ops = {
3490 	.start	= neigh_stat_seq_start,
3491 	.next	= neigh_stat_seq_next,
3492 	.stop	= neigh_stat_seq_stop,
3493 	.show	= neigh_stat_seq_show,
3494 };
3495 #endif /* CONFIG_PROC_FS */
3496 
3497 static void __neigh_notify(struct neighbour *n, int type, int flags,
3498 			   u32 pid)
3499 {
3500 	struct net *net = dev_net(n->dev);
3501 	struct sk_buff *skb;
3502 	int err = -ENOBUFS;
3503 
3504 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3505 	if (skb == NULL)
3506 		goto errout;
3507 
3508 	err = neigh_fill_info(skb, n, pid, 0, type, flags);
3509 	if (err < 0) {
3510 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3511 		WARN_ON(err == -EMSGSIZE);
3512 		kfree_skb(skb);
3513 		goto errout;
3514 	}
3515 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3516 	return;
3517 errout:
3518 	if (err < 0)
3519 		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3520 }
3521 
3522 void neigh_app_ns(struct neighbour *n)
3523 {
3524 	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3525 }
3526 EXPORT_SYMBOL(neigh_app_ns);
3527 
3528 #ifdef CONFIG_SYSCTL
3529 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3530 
3531 static int proc_unres_qlen(struct ctl_table *ctl, int write,
3532 			   void *buffer, size_t *lenp, loff_t *ppos)
3533 {
3534 	int size, ret;
3535 	struct ctl_table tmp = *ctl;
3536 
3537 	tmp.extra1 = SYSCTL_ZERO;
3538 	tmp.extra2 = &unres_qlen_max;
3539 	tmp.data = &size;
3540 
3541 	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3542 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3543 
3544 	if (write && !ret)
3545 		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3546 	return ret;
3547 }
3548 
3549 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3550 				  int index)
3551 {
3552 	struct net_device *dev;
3553 	int family = neigh_parms_family(p);
3554 
3555 	rcu_read_lock();
3556 	for_each_netdev_rcu(net, dev) {
3557 		struct neigh_parms *dst_p =
3558 				neigh_get_dev_parms_rcu(dev, family);
3559 
3560 		if (dst_p && !test_bit(index, dst_p->data_state))
3561 			dst_p->data[index] = p->data[index];
3562 	}
3563 	rcu_read_unlock();
3564 }
3565 
3566 static void neigh_proc_update(struct ctl_table *ctl, int write)
3567 {
3568 	struct net_device *dev = ctl->extra1;
3569 	struct neigh_parms *p = ctl->extra2;
3570 	struct net *net = neigh_parms_net(p);
3571 	int index = (int *) ctl->data - p->data;
3572 
3573 	if (!write)
3574 		return;
3575 
3576 	set_bit(index, p->data_state);
3577 	if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3578 		call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3579 	if (!dev) /* NULL dev means this is default value */
3580 		neigh_copy_dflt_parms(net, p, index);
3581 }
3582 
3583 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3584 					   void *buffer, size_t *lenp,
3585 					   loff_t *ppos)
3586 {
3587 	struct ctl_table tmp = *ctl;
3588 	int ret;
3589 
3590 	tmp.extra1 = SYSCTL_ZERO;
3591 	tmp.extra2 = SYSCTL_INT_MAX;
3592 
3593 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3594 	neigh_proc_update(ctl, write);
3595 	return ret;
3596 }
3597 
3598 static int neigh_proc_dointvec_ms_jiffies_positive(struct ctl_table *ctl, int write,
3599 						   void *buffer, size_t *lenp, loff_t *ppos)
3600 {
3601 	struct ctl_table tmp = *ctl;
3602 	int ret;
3603 
3604 	int min = msecs_to_jiffies(1);
3605 
3606 	tmp.extra1 = &min;
3607 	tmp.extra2 = NULL;
3608 
3609 	ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos);
3610 	neigh_proc_update(ctl, write);
3611 	return ret;
3612 }
3613 
3614 int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer,
3615 			size_t *lenp, loff_t *ppos)
3616 {
3617 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3618 
3619 	neigh_proc_update(ctl, write);
3620 	return ret;
3621 }
3622 EXPORT_SYMBOL(neigh_proc_dointvec);
3623 
3624 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer,
3625 				size_t *lenp, loff_t *ppos)
3626 {
3627 	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3628 
3629 	neigh_proc_update(ctl, write);
3630 	return ret;
3631 }
3632 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3633 
3634 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3635 					      void *buffer, size_t *lenp,
3636 					      loff_t *ppos)
3637 {
3638 	int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3639 
3640 	neigh_proc_update(ctl, write);
3641 	return ret;
3642 }
3643 
3644 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3645 				   void *buffer, size_t *lenp, loff_t *ppos)
3646 {
3647 	int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3648 
3649 	neigh_proc_update(ctl, write);
3650 	return ret;
3651 }
3652 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3653 
3654 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3655 					  void *buffer, size_t *lenp,
3656 					  loff_t *ppos)
3657 {
3658 	int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3659 
3660 	neigh_proc_update(ctl, write);
3661 	return ret;
3662 }
3663 
3664 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3665 					  void *buffer, size_t *lenp,
3666 					  loff_t *ppos)
3667 {
3668 	struct neigh_parms *p = ctl->extra2;
3669 	int ret;
3670 
3671 	if (strcmp(ctl->procname, "base_reachable_time") == 0)
3672 		ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3673 	else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3674 		ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3675 	else
3676 		ret = -1;
3677 
3678 	if (write && ret == 0) {
3679 		/* update reachable_time as well, otherwise, the change will
3680 		 * only be effective after the next time neigh_periodic_work
3681 		 * decides to recompute it
3682 		 */
3683 		p->reachable_time =
3684 			neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3685 	}
3686 	return ret;
3687 }
3688 
3689 #define NEIGH_PARMS_DATA_OFFSET(index)	\
3690 	(&((struct neigh_parms *) 0)->data[index])
3691 
3692 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3693 	[NEIGH_VAR_ ## attr] = { \
3694 		.procname	= name, \
3695 		.data		= NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3696 		.maxlen		= sizeof(int), \
3697 		.mode		= mval, \
3698 		.proc_handler	= proc, \
3699 	}
3700 
3701 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3702 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3703 
3704 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3705 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3706 
3707 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3708 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3709 
3710 #define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
3711 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)
3712 
3713 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3714 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3715 
3716 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3717 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3718 
3719 static struct neigh_sysctl_table {
3720 	struct ctl_table_header *sysctl_header;
3721 	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3722 } neigh_sysctl_template __read_mostly = {
3723 	.neigh_vars = {
3724 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3725 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3726 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3727 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3728 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3729 		NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3730 		NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3731 		NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS,
3732 						       "interval_probe_time_ms"),
3733 		NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3734 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3735 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3736 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3737 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3738 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3739 		NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3740 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3741 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3742 		[NEIGH_VAR_GC_INTERVAL] = {
3743 			.procname	= "gc_interval",
3744 			.maxlen		= sizeof(int),
3745 			.mode		= 0644,
3746 			.proc_handler	= proc_dointvec_jiffies,
3747 		},
3748 		[NEIGH_VAR_GC_THRESH1] = {
3749 			.procname	= "gc_thresh1",
3750 			.maxlen		= sizeof(int),
3751 			.mode		= 0644,
3752 			.extra1		= SYSCTL_ZERO,
3753 			.extra2		= SYSCTL_INT_MAX,
3754 			.proc_handler	= proc_dointvec_minmax,
3755 		},
3756 		[NEIGH_VAR_GC_THRESH2] = {
3757 			.procname	= "gc_thresh2",
3758 			.maxlen		= sizeof(int),
3759 			.mode		= 0644,
3760 			.extra1		= SYSCTL_ZERO,
3761 			.extra2		= SYSCTL_INT_MAX,
3762 			.proc_handler	= proc_dointvec_minmax,
3763 		},
3764 		[NEIGH_VAR_GC_THRESH3] = {
3765 			.procname	= "gc_thresh3",
3766 			.maxlen		= sizeof(int),
3767 			.mode		= 0644,
3768 			.extra1		= SYSCTL_ZERO,
3769 			.extra2		= SYSCTL_INT_MAX,
3770 			.proc_handler	= proc_dointvec_minmax,
3771 		},
3772 		{},
3773 	},
3774 };
3775 
3776 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3777 			  proc_handler *handler)
3778 {
3779 	int i;
3780 	struct neigh_sysctl_table *t;
3781 	const char *dev_name_source;
3782 	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3783 	char *p_name;
3784 	size_t neigh_vars_size;
3785 
3786 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
3787 	if (!t)
3788 		goto err;
3789 
3790 	for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3791 		t->neigh_vars[i].data += (long) p;
3792 		t->neigh_vars[i].extra1 = dev;
3793 		t->neigh_vars[i].extra2 = p;
3794 	}
3795 
3796 	neigh_vars_size = ARRAY_SIZE(t->neigh_vars);
3797 	if (dev) {
3798 		dev_name_source = dev->name;
3799 		/* Terminate the table early */
3800 		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3801 		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3802 		neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1;
3803 	} else {
3804 		struct neigh_table *tbl = p->tbl;
3805 		dev_name_source = "default";
3806 		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3807 		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3808 		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3809 		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3810 	}
3811 
3812 	if (handler) {
3813 		/* RetransTime */
3814 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3815 		/* ReachableTime */
3816 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3817 		/* RetransTime (in milliseconds)*/
3818 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3819 		/* ReachableTime (in milliseconds) */
3820 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3821 	} else {
3822 		/* Those handlers will update p->reachable_time after
3823 		 * base_reachable_time(_ms) is set to ensure the new timer starts being
3824 		 * applied after the next neighbour update instead of waiting for
3825 		 * neigh_periodic_work to update its value (can be multiple minutes)
3826 		 * So any handler that replaces them should do this as well
3827 		 */
3828 		/* ReachableTime */
3829 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3830 			neigh_proc_base_reachable_time;
3831 		/* ReachableTime (in milliseconds) */
3832 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3833 			neigh_proc_base_reachable_time;
3834 	}
3835 
3836 	switch (neigh_parms_family(p)) {
3837 	case AF_INET:
3838 	      p_name = "ipv4";
3839 	      break;
3840 	case AF_INET6:
3841 	      p_name = "ipv6";
3842 	      break;
3843 	default:
3844 	      BUG();
3845 	}
3846 
3847 	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3848 		p_name, dev_name_source);
3849 	t->sysctl_header = register_net_sysctl_sz(neigh_parms_net(p),
3850 						  neigh_path, t->neigh_vars,
3851 						  neigh_vars_size);
3852 	if (!t->sysctl_header)
3853 		goto free;
3854 
3855 	p->sysctl_table = t;
3856 	return 0;
3857 
3858 free:
3859 	kfree(t);
3860 err:
3861 	return -ENOBUFS;
3862 }
3863 EXPORT_SYMBOL(neigh_sysctl_register);
3864 
3865 void neigh_sysctl_unregister(struct neigh_parms *p)
3866 {
3867 	if (p->sysctl_table) {
3868 		struct neigh_sysctl_table *t = p->sysctl_table;
3869 		p->sysctl_table = NULL;
3870 		unregister_net_sysctl_table(t->sysctl_header);
3871 		kfree(t);
3872 	}
3873 }
3874 EXPORT_SYMBOL(neigh_sysctl_unregister);
3875 
3876 #endif	/* CONFIG_SYSCTL */
3877 
3878 static int __init neigh_init(void)
3879 {
3880 	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3881 	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3882 	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
3883 
3884 	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3885 		      0);
3886 	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3887 
3888 	return 0;
3889 }
3890 
3891 subsys_initcall(neigh_init);
3892