1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Generic address resolution entity 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 8 * 9 * Fixes: 10 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add. 11 * Harald Welte Add neighbour cache statistics like rtstat 12 */ 13 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 16 #include <linux/slab.h> 17 #include <linux/kmemleak.h> 18 #include <linux/types.h> 19 #include <linux/kernel.h> 20 #include <linux/module.h> 21 #include <linux/socket.h> 22 #include <linux/netdevice.h> 23 #include <linux/proc_fs.h> 24 #ifdef CONFIG_SYSCTL 25 #include <linux/sysctl.h> 26 #endif 27 #include <linux/times.h> 28 #include <net/net_namespace.h> 29 #include <net/neighbour.h> 30 #include <net/arp.h> 31 #include <net/dst.h> 32 #include <net/sock.h> 33 #include <net/netevent.h> 34 #include <net/netlink.h> 35 #include <linux/rtnetlink.h> 36 #include <linux/random.h> 37 #include <linux/string.h> 38 #include <linux/log2.h> 39 #include <linux/inetdevice.h> 40 #include <net/addrconf.h> 41 42 #include <trace/events/neigh.h> 43 44 #define DEBUG 45 #define NEIGH_DEBUG 1 46 #define neigh_dbg(level, fmt, ...) \ 47 do { \ 48 if (level <= NEIGH_DEBUG) \ 49 pr_debug(fmt, ##__VA_ARGS__); \ 50 } while (0) 51 52 #define PNEIGH_HASHMASK 0xF 53 54 static void neigh_timer_handler(struct timer_list *t); 55 static void __neigh_notify(struct neighbour *n, int type, int flags, 56 u32 pid); 57 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid); 58 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, 59 struct net_device *dev); 60 61 #ifdef CONFIG_PROC_FS 62 static const struct seq_operations neigh_stat_seq_ops; 63 #endif 64 65 /* 66 Neighbour hash table buckets are protected with rwlock tbl->lock. 67 68 - All the scans/updates to hash buckets MUST be made under this lock. 69 - NOTHING clever should be made under this lock: no callbacks 70 to protocol backends, no attempts to send something to network. 71 It will result in deadlocks, if backend/driver wants to use neighbour 72 cache. 73 - If the entry requires some non-trivial actions, increase 74 its reference count and release table lock. 75 76 Neighbour entries are protected: 77 - with reference count. 78 - with rwlock neigh->lock 79 80 Reference count prevents destruction. 81 82 neigh->lock mainly serializes ll address data and its validity state. 83 However, the same lock is used to protect another entry fields: 84 - timer 85 - resolution queue 86 87 Again, nothing clever shall be made under neigh->lock, 88 the most complicated procedure, which we allow is dev->hard_header. 89 It is supposed, that dev->hard_header is simplistic and does 90 not make callbacks to neighbour tables. 91 */ 92 93 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb) 94 { 95 kfree_skb(skb); 96 return -ENETDOWN; 97 } 98 99 static void neigh_cleanup_and_release(struct neighbour *neigh) 100 { 101 trace_neigh_cleanup_and_release(neigh, 0); 102 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0); 103 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); 104 neigh_release(neigh); 105 } 106 107 /* 108 * It is random distribution in the interval (1/2)*base...(3/2)*base. 109 * It corresponds to default IPv6 settings and is not overridable, 110 * because it is really reasonable choice. 111 */ 112 113 unsigned long neigh_rand_reach_time(unsigned long base) 114 { 115 return base ? (prandom_u32() % base) + (base >> 1) : 0; 116 } 117 EXPORT_SYMBOL(neigh_rand_reach_time); 118 119 static void neigh_mark_dead(struct neighbour *n) 120 { 121 n->dead = 1; 122 if (!list_empty(&n->gc_list)) { 123 list_del_init(&n->gc_list); 124 atomic_dec(&n->tbl->gc_entries); 125 } 126 } 127 128 static void neigh_update_gc_list(struct neighbour *n) 129 { 130 bool on_gc_list, exempt_from_gc; 131 132 write_lock_bh(&n->tbl->lock); 133 write_lock(&n->lock); 134 135 /* remove from the gc list if new state is permanent or if neighbor 136 * is externally learned; otherwise entry should be on the gc list 137 */ 138 exempt_from_gc = n->nud_state & NUD_PERMANENT || 139 n->flags & NTF_EXT_LEARNED; 140 on_gc_list = !list_empty(&n->gc_list); 141 142 if (exempt_from_gc && on_gc_list) { 143 list_del_init(&n->gc_list); 144 atomic_dec(&n->tbl->gc_entries); 145 } else if (!exempt_from_gc && !on_gc_list) { 146 /* add entries to the tail; cleaning removes from the front */ 147 list_add_tail(&n->gc_list, &n->tbl->gc_list); 148 atomic_inc(&n->tbl->gc_entries); 149 } 150 151 write_unlock(&n->lock); 152 write_unlock_bh(&n->tbl->lock); 153 } 154 155 static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags, 156 int *notify) 157 { 158 bool rc = false; 159 u8 ndm_flags; 160 161 if (!(flags & NEIGH_UPDATE_F_ADMIN)) 162 return rc; 163 164 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; 165 if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { 166 if (ndm_flags & NTF_EXT_LEARNED) 167 neigh->flags |= NTF_EXT_LEARNED; 168 else 169 neigh->flags &= ~NTF_EXT_LEARNED; 170 rc = true; 171 *notify = 1; 172 } 173 174 return rc; 175 } 176 177 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, 178 struct neigh_table *tbl) 179 { 180 bool retval = false; 181 182 write_lock(&n->lock); 183 if (refcount_read(&n->refcnt) == 1) { 184 struct neighbour *neigh; 185 186 neigh = rcu_dereference_protected(n->next, 187 lockdep_is_held(&tbl->lock)); 188 rcu_assign_pointer(*np, neigh); 189 neigh_mark_dead(n); 190 retval = true; 191 } 192 write_unlock(&n->lock); 193 if (retval) 194 neigh_cleanup_and_release(n); 195 return retval; 196 } 197 198 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) 199 { 200 struct neigh_hash_table *nht; 201 void *pkey = ndel->primary_key; 202 u32 hash_val; 203 struct neighbour *n; 204 struct neighbour __rcu **np; 205 206 nht = rcu_dereference_protected(tbl->nht, 207 lockdep_is_held(&tbl->lock)); 208 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd); 209 hash_val = hash_val >> (32 - nht->hash_shift); 210 211 np = &nht->hash_buckets[hash_val]; 212 while ((n = rcu_dereference_protected(*np, 213 lockdep_is_held(&tbl->lock)))) { 214 if (n == ndel) 215 return neigh_del(n, np, tbl); 216 np = &n->next; 217 } 218 return false; 219 } 220 221 static int neigh_forced_gc(struct neigh_table *tbl) 222 { 223 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2; 224 unsigned long tref = jiffies - 5 * HZ; 225 struct neighbour *n, *tmp; 226 int shrunk = 0; 227 228 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); 229 230 write_lock_bh(&tbl->lock); 231 232 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) { 233 if (refcount_read(&n->refcnt) == 1) { 234 bool remove = false; 235 236 write_lock(&n->lock); 237 if ((n->nud_state == NUD_FAILED) || 238 (tbl->is_multicast && 239 tbl->is_multicast(n->primary_key)) || 240 time_after(tref, n->updated)) 241 remove = true; 242 write_unlock(&n->lock); 243 244 if (remove && neigh_remove_one(n, tbl)) 245 shrunk++; 246 if (shrunk >= max_clean) 247 break; 248 } 249 } 250 251 tbl->last_flush = jiffies; 252 253 write_unlock_bh(&tbl->lock); 254 255 return shrunk; 256 } 257 258 static void neigh_add_timer(struct neighbour *n, unsigned long when) 259 { 260 neigh_hold(n); 261 if (unlikely(mod_timer(&n->timer, when))) { 262 printk("NEIGH: BUG, double timer add, state is %x\n", 263 n->nud_state); 264 dump_stack(); 265 } 266 } 267 268 static int neigh_del_timer(struct neighbour *n) 269 { 270 if ((n->nud_state & NUD_IN_TIMER) && 271 del_timer(&n->timer)) { 272 neigh_release(n); 273 return 1; 274 } 275 return 0; 276 } 277 278 static void pneigh_queue_purge(struct sk_buff_head *list) 279 { 280 struct sk_buff *skb; 281 282 while ((skb = skb_dequeue(list)) != NULL) { 283 dev_put(skb->dev); 284 kfree_skb(skb); 285 } 286 } 287 288 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, 289 bool skip_perm) 290 { 291 int i; 292 struct neigh_hash_table *nht; 293 294 nht = rcu_dereference_protected(tbl->nht, 295 lockdep_is_held(&tbl->lock)); 296 297 for (i = 0; i < (1 << nht->hash_shift); i++) { 298 struct neighbour *n; 299 struct neighbour __rcu **np = &nht->hash_buckets[i]; 300 301 while ((n = rcu_dereference_protected(*np, 302 lockdep_is_held(&tbl->lock))) != NULL) { 303 if (dev && n->dev != dev) { 304 np = &n->next; 305 continue; 306 } 307 if (skip_perm && n->nud_state & NUD_PERMANENT) { 308 np = &n->next; 309 continue; 310 } 311 rcu_assign_pointer(*np, 312 rcu_dereference_protected(n->next, 313 lockdep_is_held(&tbl->lock))); 314 write_lock(&n->lock); 315 neigh_del_timer(n); 316 neigh_mark_dead(n); 317 if (refcount_read(&n->refcnt) != 1) { 318 /* The most unpleasant situation. 319 We must destroy neighbour entry, 320 but someone still uses it. 321 322 The destroy will be delayed until 323 the last user releases us, but 324 we must kill timers etc. and move 325 it to safe state. 326 */ 327 __skb_queue_purge(&n->arp_queue); 328 n->arp_queue_len_bytes = 0; 329 n->output = neigh_blackhole; 330 if (n->nud_state & NUD_VALID) 331 n->nud_state = NUD_NOARP; 332 else 333 n->nud_state = NUD_NONE; 334 neigh_dbg(2, "neigh %p is stray\n", n); 335 } 336 write_unlock(&n->lock); 337 neigh_cleanup_and_release(n); 338 } 339 } 340 } 341 342 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) 343 { 344 write_lock_bh(&tbl->lock); 345 neigh_flush_dev(tbl, dev, false); 346 write_unlock_bh(&tbl->lock); 347 } 348 EXPORT_SYMBOL(neigh_changeaddr); 349 350 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, 351 bool skip_perm) 352 { 353 write_lock_bh(&tbl->lock); 354 neigh_flush_dev(tbl, dev, skip_perm); 355 pneigh_ifdown_and_unlock(tbl, dev); 356 357 del_timer_sync(&tbl->proxy_timer); 358 pneigh_queue_purge(&tbl->proxy_queue); 359 return 0; 360 } 361 362 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev) 363 { 364 __neigh_ifdown(tbl, dev, true); 365 return 0; 366 } 367 EXPORT_SYMBOL(neigh_carrier_down); 368 369 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) 370 { 371 __neigh_ifdown(tbl, dev, false); 372 return 0; 373 } 374 EXPORT_SYMBOL(neigh_ifdown); 375 376 static struct neighbour *neigh_alloc(struct neigh_table *tbl, 377 struct net_device *dev, 378 bool exempt_from_gc) 379 { 380 struct neighbour *n = NULL; 381 unsigned long now = jiffies; 382 int entries; 383 384 if (exempt_from_gc) 385 goto do_alloc; 386 387 entries = atomic_inc_return(&tbl->gc_entries) - 1; 388 if (entries >= tbl->gc_thresh3 || 389 (entries >= tbl->gc_thresh2 && 390 time_after(now, tbl->last_flush + 5 * HZ))) { 391 if (!neigh_forced_gc(tbl) && 392 entries >= tbl->gc_thresh3) { 393 net_info_ratelimited("%s: neighbor table overflow!\n", 394 tbl->id); 395 NEIGH_CACHE_STAT_INC(tbl, table_fulls); 396 goto out_entries; 397 } 398 } 399 400 do_alloc: 401 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC); 402 if (!n) 403 goto out_entries; 404 405 __skb_queue_head_init(&n->arp_queue); 406 rwlock_init(&n->lock); 407 seqlock_init(&n->ha_lock); 408 n->updated = n->used = now; 409 n->nud_state = NUD_NONE; 410 n->output = neigh_blackhole; 411 seqlock_init(&n->hh.hh_lock); 412 n->parms = neigh_parms_clone(&tbl->parms); 413 timer_setup(&n->timer, neigh_timer_handler, 0); 414 415 NEIGH_CACHE_STAT_INC(tbl, allocs); 416 n->tbl = tbl; 417 refcount_set(&n->refcnt, 1); 418 n->dead = 1; 419 INIT_LIST_HEAD(&n->gc_list); 420 421 atomic_inc(&tbl->entries); 422 out: 423 return n; 424 425 out_entries: 426 if (!exempt_from_gc) 427 atomic_dec(&tbl->gc_entries); 428 goto out; 429 } 430 431 static void neigh_get_hash_rnd(u32 *x) 432 { 433 *x = get_random_u32() | 1; 434 } 435 436 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) 437 { 438 size_t size = (1 << shift) * sizeof(struct neighbour *); 439 struct neigh_hash_table *ret; 440 struct neighbour __rcu **buckets; 441 int i; 442 443 ret = kmalloc(sizeof(*ret), GFP_ATOMIC); 444 if (!ret) 445 return NULL; 446 if (size <= PAGE_SIZE) { 447 buckets = kzalloc(size, GFP_ATOMIC); 448 } else { 449 buckets = (struct neighbour __rcu **) 450 __get_free_pages(GFP_ATOMIC | __GFP_ZERO, 451 get_order(size)); 452 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); 453 } 454 if (!buckets) { 455 kfree(ret); 456 return NULL; 457 } 458 ret->hash_buckets = buckets; 459 ret->hash_shift = shift; 460 for (i = 0; i < NEIGH_NUM_HASH_RND; i++) 461 neigh_get_hash_rnd(&ret->hash_rnd[i]); 462 return ret; 463 } 464 465 static void neigh_hash_free_rcu(struct rcu_head *head) 466 { 467 struct neigh_hash_table *nht = container_of(head, 468 struct neigh_hash_table, 469 rcu); 470 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); 471 struct neighbour __rcu **buckets = nht->hash_buckets; 472 473 if (size <= PAGE_SIZE) { 474 kfree(buckets); 475 } else { 476 kmemleak_free(buckets); 477 free_pages((unsigned long)buckets, get_order(size)); 478 } 479 kfree(nht); 480 } 481 482 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, 483 unsigned long new_shift) 484 { 485 unsigned int i, hash; 486 struct neigh_hash_table *new_nht, *old_nht; 487 488 NEIGH_CACHE_STAT_INC(tbl, hash_grows); 489 490 old_nht = rcu_dereference_protected(tbl->nht, 491 lockdep_is_held(&tbl->lock)); 492 new_nht = neigh_hash_alloc(new_shift); 493 if (!new_nht) 494 return old_nht; 495 496 for (i = 0; i < (1 << old_nht->hash_shift); i++) { 497 struct neighbour *n, *next; 498 499 for (n = rcu_dereference_protected(old_nht->hash_buckets[i], 500 lockdep_is_held(&tbl->lock)); 501 n != NULL; 502 n = next) { 503 hash = tbl->hash(n->primary_key, n->dev, 504 new_nht->hash_rnd); 505 506 hash >>= (32 - new_nht->hash_shift); 507 next = rcu_dereference_protected(n->next, 508 lockdep_is_held(&tbl->lock)); 509 510 rcu_assign_pointer(n->next, 511 rcu_dereference_protected( 512 new_nht->hash_buckets[hash], 513 lockdep_is_held(&tbl->lock))); 514 rcu_assign_pointer(new_nht->hash_buckets[hash], n); 515 } 516 } 517 518 rcu_assign_pointer(tbl->nht, new_nht); 519 call_rcu(&old_nht->rcu, neigh_hash_free_rcu); 520 return new_nht; 521 } 522 523 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, 524 struct net_device *dev) 525 { 526 struct neighbour *n; 527 528 NEIGH_CACHE_STAT_INC(tbl, lookups); 529 530 rcu_read_lock_bh(); 531 n = __neigh_lookup_noref(tbl, pkey, dev); 532 if (n) { 533 if (!refcount_inc_not_zero(&n->refcnt)) 534 n = NULL; 535 NEIGH_CACHE_STAT_INC(tbl, hits); 536 } 537 538 rcu_read_unlock_bh(); 539 return n; 540 } 541 EXPORT_SYMBOL(neigh_lookup); 542 543 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, 544 const void *pkey) 545 { 546 struct neighbour *n; 547 unsigned int key_len = tbl->key_len; 548 u32 hash_val; 549 struct neigh_hash_table *nht; 550 551 NEIGH_CACHE_STAT_INC(tbl, lookups); 552 553 rcu_read_lock_bh(); 554 nht = rcu_dereference_bh(tbl->nht); 555 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift); 556 557 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); 558 n != NULL; 559 n = rcu_dereference_bh(n->next)) { 560 if (!memcmp(n->primary_key, pkey, key_len) && 561 net_eq(dev_net(n->dev), net)) { 562 if (!refcount_inc_not_zero(&n->refcnt)) 563 n = NULL; 564 NEIGH_CACHE_STAT_INC(tbl, hits); 565 break; 566 } 567 } 568 569 rcu_read_unlock_bh(); 570 return n; 571 } 572 EXPORT_SYMBOL(neigh_lookup_nodev); 573 574 static struct neighbour *___neigh_create(struct neigh_table *tbl, 575 const void *pkey, 576 struct net_device *dev, 577 bool exempt_from_gc, bool want_ref) 578 { 579 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc); 580 u32 hash_val; 581 unsigned int key_len = tbl->key_len; 582 int error; 583 struct neigh_hash_table *nht; 584 585 trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc); 586 587 if (!n) { 588 rc = ERR_PTR(-ENOBUFS); 589 goto out; 590 } 591 592 memcpy(n->primary_key, pkey, key_len); 593 n->dev = dev; 594 dev_hold(dev); 595 596 /* Protocol specific setup. */ 597 if (tbl->constructor && (error = tbl->constructor(n)) < 0) { 598 rc = ERR_PTR(error); 599 goto out_neigh_release; 600 } 601 602 if (dev->netdev_ops->ndo_neigh_construct) { 603 error = dev->netdev_ops->ndo_neigh_construct(dev, n); 604 if (error < 0) { 605 rc = ERR_PTR(error); 606 goto out_neigh_release; 607 } 608 } 609 610 /* Device specific setup. */ 611 if (n->parms->neigh_setup && 612 (error = n->parms->neigh_setup(n)) < 0) { 613 rc = ERR_PTR(error); 614 goto out_neigh_release; 615 } 616 617 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1); 618 619 write_lock_bh(&tbl->lock); 620 nht = rcu_dereference_protected(tbl->nht, 621 lockdep_is_held(&tbl->lock)); 622 623 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) 624 nht = neigh_hash_grow(tbl, nht->hash_shift + 1); 625 626 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift); 627 628 if (n->parms->dead) { 629 rc = ERR_PTR(-EINVAL); 630 goto out_tbl_unlock; 631 } 632 633 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], 634 lockdep_is_held(&tbl->lock)); 635 n1 != NULL; 636 n1 = rcu_dereference_protected(n1->next, 637 lockdep_is_held(&tbl->lock))) { 638 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { 639 if (want_ref) 640 neigh_hold(n1); 641 rc = n1; 642 goto out_tbl_unlock; 643 } 644 } 645 646 n->dead = 0; 647 if (!exempt_from_gc) 648 list_add_tail(&n->gc_list, &n->tbl->gc_list); 649 650 if (want_ref) 651 neigh_hold(n); 652 rcu_assign_pointer(n->next, 653 rcu_dereference_protected(nht->hash_buckets[hash_val], 654 lockdep_is_held(&tbl->lock))); 655 rcu_assign_pointer(nht->hash_buckets[hash_val], n); 656 write_unlock_bh(&tbl->lock); 657 neigh_dbg(2, "neigh %p is created\n", n); 658 rc = n; 659 out: 660 return rc; 661 out_tbl_unlock: 662 write_unlock_bh(&tbl->lock); 663 out_neigh_release: 664 if (!exempt_from_gc) 665 atomic_dec(&tbl->gc_entries); 666 neigh_release(n); 667 goto out; 668 } 669 670 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, 671 struct net_device *dev, bool want_ref) 672 { 673 return ___neigh_create(tbl, pkey, dev, false, want_ref); 674 } 675 EXPORT_SYMBOL(__neigh_create); 676 677 static u32 pneigh_hash(const void *pkey, unsigned int key_len) 678 { 679 u32 hash_val = *(u32 *)(pkey + key_len - 4); 680 hash_val ^= (hash_val >> 16); 681 hash_val ^= hash_val >> 8; 682 hash_val ^= hash_val >> 4; 683 hash_val &= PNEIGH_HASHMASK; 684 return hash_val; 685 } 686 687 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, 688 struct net *net, 689 const void *pkey, 690 unsigned int key_len, 691 struct net_device *dev) 692 { 693 while (n) { 694 if (!memcmp(n->key, pkey, key_len) && 695 net_eq(pneigh_net(n), net) && 696 (n->dev == dev || !n->dev)) 697 return n; 698 n = n->next; 699 } 700 return NULL; 701 } 702 703 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, 704 struct net *net, const void *pkey, struct net_device *dev) 705 { 706 unsigned int key_len = tbl->key_len; 707 u32 hash_val = pneigh_hash(pkey, key_len); 708 709 return __pneigh_lookup_1(tbl->phash_buckets[hash_val], 710 net, pkey, key_len, dev); 711 } 712 EXPORT_SYMBOL_GPL(__pneigh_lookup); 713 714 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, 715 struct net *net, const void *pkey, 716 struct net_device *dev, int creat) 717 { 718 struct pneigh_entry *n; 719 unsigned int key_len = tbl->key_len; 720 u32 hash_val = pneigh_hash(pkey, key_len); 721 722 read_lock_bh(&tbl->lock); 723 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val], 724 net, pkey, key_len, dev); 725 read_unlock_bh(&tbl->lock); 726 727 if (n || !creat) 728 goto out; 729 730 ASSERT_RTNL(); 731 732 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); 733 if (!n) 734 goto out; 735 736 n->protocol = 0; 737 write_pnet(&n->net, net); 738 memcpy(n->key, pkey, key_len); 739 n->dev = dev; 740 if (dev) 741 dev_hold(dev); 742 743 if (tbl->pconstructor && tbl->pconstructor(n)) { 744 if (dev) 745 dev_put(dev); 746 kfree(n); 747 n = NULL; 748 goto out; 749 } 750 751 write_lock_bh(&tbl->lock); 752 n->next = tbl->phash_buckets[hash_val]; 753 tbl->phash_buckets[hash_val] = n; 754 write_unlock_bh(&tbl->lock); 755 out: 756 return n; 757 } 758 EXPORT_SYMBOL(pneigh_lookup); 759 760 761 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, 762 struct net_device *dev) 763 { 764 struct pneigh_entry *n, **np; 765 unsigned int key_len = tbl->key_len; 766 u32 hash_val = pneigh_hash(pkey, key_len); 767 768 write_lock_bh(&tbl->lock); 769 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; 770 np = &n->next) { 771 if (!memcmp(n->key, pkey, key_len) && n->dev == dev && 772 net_eq(pneigh_net(n), net)) { 773 *np = n->next; 774 write_unlock_bh(&tbl->lock); 775 if (tbl->pdestructor) 776 tbl->pdestructor(n); 777 if (n->dev) 778 dev_put(n->dev); 779 kfree(n); 780 return 0; 781 } 782 } 783 write_unlock_bh(&tbl->lock); 784 return -ENOENT; 785 } 786 787 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, 788 struct net_device *dev) 789 { 790 struct pneigh_entry *n, **np, *freelist = NULL; 791 u32 h; 792 793 for (h = 0; h <= PNEIGH_HASHMASK; h++) { 794 np = &tbl->phash_buckets[h]; 795 while ((n = *np) != NULL) { 796 if (!dev || n->dev == dev) { 797 *np = n->next; 798 n->next = freelist; 799 freelist = n; 800 continue; 801 } 802 np = &n->next; 803 } 804 } 805 write_unlock_bh(&tbl->lock); 806 while ((n = freelist)) { 807 freelist = n->next; 808 n->next = NULL; 809 if (tbl->pdestructor) 810 tbl->pdestructor(n); 811 if (n->dev) 812 dev_put(n->dev); 813 kfree(n); 814 } 815 return -ENOENT; 816 } 817 818 static void neigh_parms_destroy(struct neigh_parms *parms); 819 820 static inline void neigh_parms_put(struct neigh_parms *parms) 821 { 822 if (refcount_dec_and_test(&parms->refcnt)) 823 neigh_parms_destroy(parms); 824 } 825 826 /* 827 * neighbour must already be out of the table; 828 * 829 */ 830 void neigh_destroy(struct neighbour *neigh) 831 { 832 struct net_device *dev = neigh->dev; 833 834 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); 835 836 if (!neigh->dead) { 837 pr_warn("Destroying alive neighbour %p\n", neigh); 838 dump_stack(); 839 return; 840 } 841 842 if (neigh_del_timer(neigh)) 843 pr_warn("Impossible event\n"); 844 845 write_lock_bh(&neigh->lock); 846 __skb_queue_purge(&neigh->arp_queue); 847 write_unlock_bh(&neigh->lock); 848 neigh->arp_queue_len_bytes = 0; 849 850 if (dev->netdev_ops->ndo_neigh_destroy) 851 dev->netdev_ops->ndo_neigh_destroy(dev, neigh); 852 853 dev_put(dev); 854 neigh_parms_put(neigh->parms); 855 856 neigh_dbg(2, "neigh %p is destroyed\n", neigh); 857 858 atomic_dec(&neigh->tbl->entries); 859 kfree_rcu(neigh, rcu); 860 } 861 EXPORT_SYMBOL(neigh_destroy); 862 863 /* Neighbour state is suspicious; 864 disable fast path. 865 866 Called with write_locked neigh. 867 */ 868 static void neigh_suspect(struct neighbour *neigh) 869 { 870 neigh_dbg(2, "neigh %p is suspected\n", neigh); 871 872 neigh->output = neigh->ops->output; 873 } 874 875 /* Neighbour state is OK; 876 enable fast path. 877 878 Called with write_locked neigh. 879 */ 880 static void neigh_connect(struct neighbour *neigh) 881 { 882 neigh_dbg(2, "neigh %p is connected\n", neigh); 883 884 neigh->output = neigh->ops->connected_output; 885 } 886 887 static void neigh_periodic_work(struct work_struct *work) 888 { 889 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); 890 struct neighbour *n; 891 struct neighbour __rcu **np; 892 unsigned int i; 893 struct neigh_hash_table *nht; 894 895 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); 896 897 write_lock_bh(&tbl->lock); 898 nht = rcu_dereference_protected(tbl->nht, 899 lockdep_is_held(&tbl->lock)); 900 901 /* 902 * periodically recompute ReachableTime from random function 903 */ 904 905 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) { 906 struct neigh_parms *p; 907 tbl->last_rand = jiffies; 908 list_for_each_entry(p, &tbl->parms_list, list) 909 p->reachable_time = 910 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 911 } 912 913 if (atomic_read(&tbl->entries) < tbl->gc_thresh1) 914 goto out; 915 916 for (i = 0 ; i < (1 << nht->hash_shift); i++) { 917 np = &nht->hash_buckets[i]; 918 919 while ((n = rcu_dereference_protected(*np, 920 lockdep_is_held(&tbl->lock))) != NULL) { 921 unsigned int state; 922 923 write_lock(&n->lock); 924 925 state = n->nud_state; 926 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) || 927 (n->flags & NTF_EXT_LEARNED)) { 928 write_unlock(&n->lock); 929 goto next_elt; 930 } 931 932 if (time_before(n->used, n->confirmed)) 933 n->used = n->confirmed; 934 935 if (refcount_read(&n->refcnt) == 1 && 936 (state == NUD_FAILED || 937 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { 938 *np = n->next; 939 neigh_mark_dead(n); 940 write_unlock(&n->lock); 941 neigh_cleanup_and_release(n); 942 continue; 943 } 944 write_unlock(&n->lock); 945 946 next_elt: 947 np = &n->next; 948 } 949 /* 950 * It's fine to release lock here, even if hash table 951 * grows while we are preempted. 952 */ 953 write_unlock_bh(&tbl->lock); 954 cond_resched(); 955 write_lock_bh(&tbl->lock); 956 nht = rcu_dereference_protected(tbl->nht, 957 lockdep_is_held(&tbl->lock)); 958 } 959 out: 960 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks. 961 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2 962 * BASE_REACHABLE_TIME. 963 */ 964 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 965 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1); 966 write_unlock_bh(&tbl->lock); 967 } 968 969 static __inline__ int neigh_max_probes(struct neighbour *n) 970 { 971 struct neigh_parms *p = n->parms; 972 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + 973 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) : 974 NEIGH_VAR(p, MCAST_PROBES)); 975 } 976 977 static void neigh_invalidate(struct neighbour *neigh) 978 __releases(neigh->lock) 979 __acquires(neigh->lock) 980 { 981 struct sk_buff *skb; 982 983 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); 984 neigh_dbg(2, "neigh %p is failed\n", neigh); 985 neigh->updated = jiffies; 986 987 /* It is very thin place. report_unreachable is very complicated 988 routine. Particularly, it can hit the same neighbour entry! 989 990 So that, we try to be accurate and avoid dead loop. --ANK 991 */ 992 while (neigh->nud_state == NUD_FAILED && 993 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { 994 write_unlock(&neigh->lock); 995 neigh->ops->error_report(neigh, skb); 996 write_lock(&neigh->lock); 997 } 998 __skb_queue_purge(&neigh->arp_queue); 999 neigh->arp_queue_len_bytes = 0; 1000 } 1001 1002 static void neigh_probe(struct neighbour *neigh) 1003 __releases(neigh->lock) 1004 { 1005 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue); 1006 /* keep skb alive even if arp_queue overflows */ 1007 if (skb) 1008 skb = skb_clone(skb, GFP_ATOMIC); 1009 write_unlock(&neigh->lock); 1010 if (neigh->ops->solicit) 1011 neigh->ops->solicit(neigh, skb); 1012 atomic_inc(&neigh->probes); 1013 consume_skb(skb); 1014 } 1015 1016 /* Called when a timer expires for a neighbour entry. */ 1017 1018 static void neigh_timer_handler(struct timer_list *t) 1019 { 1020 unsigned long now, next; 1021 struct neighbour *neigh = from_timer(neigh, t, timer); 1022 unsigned int state; 1023 int notify = 0; 1024 1025 write_lock(&neigh->lock); 1026 1027 state = neigh->nud_state; 1028 now = jiffies; 1029 next = now + HZ; 1030 1031 if (!(state & NUD_IN_TIMER)) 1032 goto out; 1033 1034 if (state & NUD_REACHABLE) { 1035 if (time_before_eq(now, 1036 neigh->confirmed + neigh->parms->reachable_time)) { 1037 neigh_dbg(2, "neigh %p is still alive\n", neigh); 1038 next = neigh->confirmed + neigh->parms->reachable_time; 1039 } else if (time_before_eq(now, 1040 neigh->used + 1041 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { 1042 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1043 neigh->nud_state = NUD_DELAY; 1044 neigh->updated = jiffies; 1045 neigh_suspect(neigh); 1046 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); 1047 } else { 1048 neigh_dbg(2, "neigh %p is suspected\n", neigh); 1049 neigh->nud_state = NUD_STALE; 1050 neigh->updated = jiffies; 1051 neigh_suspect(neigh); 1052 notify = 1; 1053 } 1054 } else if (state & NUD_DELAY) { 1055 if (time_before_eq(now, 1056 neigh->confirmed + 1057 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { 1058 neigh_dbg(2, "neigh %p is now reachable\n", neigh); 1059 neigh->nud_state = NUD_REACHABLE; 1060 neigh->updated = jiffies; 1061 neigh_connect(neigh); 1062 notify = 1; 1063 next = neigh->confirmed + neigh->parms->reachable_time; 1064 } else { 1065 neigh_dbg(2, "neigh %p is probed\n", neigh); 1066 neigh->nud_state = NUD_PROBE; 1067 neigh->updated = jiffies; 1068 atomic_set(&neigh->probes, 0); 1069 notify = 1; 1070 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), 1071 HZ/100); 1072 } 1073 } else { 1074 /* NUD_PROBE|NUD_INCOMPLETE */ 1075 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100); 1076 } 1077 1078 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && 1079 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { 1080 neigh->nud_state = NUD_FAILED; 1081 notify = 1; 1082 neigh_invalidate(neigh); 1083 goto out; 1084 } 1085 1086 if (neigh->nud_state & NUD_IN_TIMER) { 1087 if (time_before(next, jiffies + HZ/100)) 1088 next = jiffies + HZ/100; 1089 if (!mod_timer(&neigh->timer, next)) 1090 neigh_hold(neigh); 1091 } 1092 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { 1093 neigh_probe(neigh); 1094 } else { 1095 out: 1096 write_unlock(&neigh->lock); 1097 } 1098 1099 if (notify) 1100 neigh_update_notify(neigh, 0); 1101 1102 trace_neigh_timer_handler(neigh, 0); 1103 1104 neigh_release(neigh); 1105 } 1106 1107 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) 1108 { 1109 int rc; 1110 bool immediate_probe = false; 1111 1112 write_lock_bh(&neigh->lock); 1113 1114 rc = 0; 1115 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) 1116 goto out_unlock_bh; 1117 if (neigh->dead) 1118 goto out_dead; 1119 1120 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { 1121 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + 1122 NEIGH_VAR(neigh->parms, APP_PROBES)) { 1123 unsigned long next, now = jiffies; 1124 1125 atomic_set(&neigh->probes, 1126 NEIGH_VAR(neigh->parms, UCAST_PROBES)); 1127 neigh_del_timer(neigh); 1128 neigh->nud_state = NUD_INCOMPLETE; 1129 neigh->updated = now; 1130 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), 1131 HZ/100); 1132 neigh_add_timer(neigh, next); 1133 immediate_probe = true; 1134 } else { 1135 neigh->nud_state = NUD_FAILED; 1136 neigh->updated = jiffies; 1137 write_unlock_bh(&neigh->lock); 1138 1139 kfree_skb(skb); 1140 return 1; 1141 } 1142 } else if (neigh->nud_state & NUD_STALE) { 1143 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1144 neigh_del_timer(neigh); 1145 neigh->nud_state = NUD_DELAY; 1146 neigh->updated = jiffies; 1147 neigh_add_timer(neigh, jiffies + 1148 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME)); 1149 } 1150 1151 if (neigh->nud_state == NUD_INCOMPLETE) { 1152 if (skb) { 1153 while (neigh->arp_queue_len_bytes + skb->truesize > 1154 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) { 1155 struct sk_buff *buff; 1156 1157 buff = __skb_dequeue(&neigh->arp_queue); 1158 if (!buff) 1159 break; 1160 neigh->arp_queue_len_bytes -= buff->truesize; 1161 kfree_skb(buff); 1162 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); 1163 } 1164 skb_dst_force(skb); 1165 __skb_queue_tail(&neigh->arp_queue, skb); 1166 neigh->arp_queue_len_bytes += skb->truesize; 1167 } 1168 rc = 1; 1169 } 1170 out_unlock_bh: 1171 if (immediate_probe) 1172 neigh_probe(neigh); 1173 else 1174 write_unlock(&neigh->lock); 1175 local_bh_enable(); 1176 trace_neigh_event_send_done(neigh, rc); 1177 return rc; 1178 1179 out_dead: 1180 if (neigh->nud_state & NUD_STALE) 1181 goto out_unlock_bh; 1182 write_unlock_bh(&neigh->lock); 1183 kfree_skb(skb); 1184 trace_neigh_event_send_dead(neigh, 1); 1185 return 1; 1186 } 1187 EXPORT_SYMBOL(__neigh_event_send); 1188 1189 static void neigh_update_hhs(struct neighbour *neigh) 1190 { 1191 struct hh_cache *hh; 1192 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) 1193 = NULL; 1194 1195 if (neigh->dev->header_ops) 1196 update = neigh->dev->header_ops->cache_update; 1197 1198 if (update) { 1199 hh = &neigh->hh; 1200 if (READ_ONCE(hh->hh_len)) { 1201 write_seqlock_bh(&hh->hh_lock); 1202 update(hh, neigh->dev, neigh->ha); 1203 write_sequnlock_bh(&hh->hh_lock); 1204 } 1205 } 1206 } 1207 1208 1209 1210 /* Generic update routine. 1211 -- lladdr is new lladdr or NULL, if it is not supplied. 1212 -- new is new state. 1213 -- flags 1214 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr, 1215 if it is different. 1216 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected" 1217 lladdr instead of overriding it 1218 if it is different. 1219 NEIGH_UPDATE_F_ADMIN means that the change is administrative. 1220 1221 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing 1222 NTF_ROUTER flag. 1223 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as 1224 a router. 1225 1226 Caller MUST hold reference count on the entry. 1227 */ 1228 1229 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, 1230 u8 new, u32 flags, u32 nlmsg_pid, 1231 struct netlink_ext_ack *extack) 1232 { 1233 bool ext_learn_change = false; 1234 u8 old; 1235 int err; 1236 int notify = 0; 1237 struct net_device *dev; 1238 int update_isrouter = 0; 1239 1240 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid); 1241 1242 write_lock_bh(&neigh->lock); 1243 1244 dev = neigh->dev; 1245 old = neigh->nud_state; 1246 err = -EPERM; 1247 1248 if (neigh->dead) { 1249 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead"); 1250 new = old; 1251 goto out; 1252 } 1253 if (!(flags & NEIGH_UPDATE_F_ADMIN) && 1254 (old & (NUD_NOARP | NUD_PERMANENT))) 1255 goto out; 1256 1257 ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); 1258 1259 if (!(new & NUD_VALID)) { 1260 neigh_del_timer(neigh); 1261 if (old & NUD_CONNECTED) 1262 neigh_suspect(neigh); 1263 neigh->nud_state = new; 1264 err = 0; 1265 notify = old & NUD_VALID; 1266 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && 1267 (new & NUD_FAILED)) { 1268 neigh_invalidate(neigh); 1269 notify = 1; 1270 } 1271 goto out; 1272 } 1273 1274 /* Compare new lladdr with cached one */ 1275 if (!dev->addr_len) { 1276 /* First case: device needs no address. */ 1277 lladdr = neigh->ha; 1278 } else if (lladdr) { 1279 /* The second case: if something is already cached 1280 and a new address is proposed: 1281 - compare new & old 1282 - if they are different, check override flag 1283 */ 1284 if ((old & NUD_VALID) && 1285 !memcmp(lladdr, neigh->ha, dev->addr_len)) 1286 lladdr = neigh->ha; 1287 } else { 1288 /* No address is supplied; if we know something, 1289 use it, otherwise discard the request. 1290 */ 1291 err = -EINVAL; 1292 if (!(old & NUD_VALID)) { 1293 NL_SET_ERR_MSG(extack, "No link layer address given"); 1294 goto out; 1295 } 1296 lladdr = neigh->ha; 1297 } 1298 1299 /* Update confirmed timestamp for neighbour entry after we 1300 * received ARP packet even if it doesn't change IP to MAC binding. 1301 */ 1302 if (new & NUD_CONNECTED) 1303 neigh->confirmed = jiffies; 1304 1305 /* If entry was valid and address is not changed, 1306 do not change entry state, if new one is STALE. 1307 */ 1308 err = 0; 1309 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER; 1310 if (old & NUD_VALID) { 1311 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) { 1312 update_isrouter = 0; 1313 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) && 1314 (old & NUD_CONNECTED)) { 1315 lladdr = neigh->ha; 1316 new = NUD_STALE; 1317 } else 1318 goto out; 1319 } else { 1320 if (lladdr == neigh->ha && new == NUD_STALE && 1321 !(flags & NEIGH_UPDATE_F_ADMIN)) 1322 new = old; 1323 } 1324 } 1325 1326 /* Update timestamp only once we know we will make a change to the 1327 * neighbour entry. Otherwise we risk to move the locktime window with 1328 * noop updates and ignore relevant ARP updates. 1329 */ 1330 if (new != old || lladdr != neigh->ha) 1331 neigh->updated = jiffies; 1332 1333 if (new != old) { 1334 neigh_del_timer(neigh); 1335 if (new & NUD_PROBE) 1336 atomic_set(&neigh->probes, 0); 1337 if (new & NUD_IN_TIMER) 1338 neigh_add_timer(neigh, (jiffies + 1339 ((new & NUD_REACHABLE) ? 1340 neigh->parms->reachable_time : 1341 0))); 1342 neigh->nud_state = new; 1343 notify = 1; 1344 } 1345 1346 if (lladdr != neigh->ha) { 1347 write_seqlock(&neigh->ha_lock); 1348 memcpy(&neigh->ha, lladdr, dev->addr_len); 1349 write_sequnlock(&neigh->ha_lock); 1350 neigh_update_hhs(neigh); 1351 if (!(new & NUD_CONNECTED)) 1352 neigh->confirmed = jiffies - 1353 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1); 1354 notify = 1; 1355 } 1356 if (new == old) 1357 goto out; 1358 if (new & NUD_CONNECTED) 1359 neigh_connect(neigh); 1360 else 1361 neigh_suspect(neigh); 1362 if (!(old & NUD_VALID)) { 1363 struct sk_buff *skb; 1364 1365 /* Again: avoid dead loop if something went wrong */ 1366 1367 while (neigh->nud_state & NUD_VALID && 1368 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { 1369 struct dst_entry *dst = skb_dst(skb); 1370 struct neighbour *n2, *n1 = neigh; 1371 write_unlock_bh(&neigh->lock); 1372 1373 rcu_read_lock(); 1374 1375 /* Why not just use 'neigh' as-is? The problem is that 1376 * things such as shaper, eql, and sch_teql can end up 1377 * using alternative, different, neigh objects to output 1378 * the packet in the output path. So what we need to do 1379 * here is re-lookup the top-level neigh in the path so 1380 * we can reinject the packet there. 1381 */ 1382 n2 = NULL; 1383 if (dst) { 1384 n2 = dst_neigh_lookup_skb(dst, skb); 1385 if (n2) 1386 n1 = n2; 1387 } 1388 n1->output(n1, skb); 1389 if (n2) 1390 neigh_release(n2); 1391 rcu_read_unlock(); 1392 1393 write_lock_bh(&neigh->lock); 1394 } 1395 __skb_queue_purge(&neigh->arp_queue); 1396 neigh->arp_queue_len_bytes = 0; 1397 } 1398 out: 1399 if (update_isrouter) 1400 neigh_update_is_router(neigh, flags, ¬ify); 1401 write_unlock_bh(&neigh->lock); 1402 1403 if (((new ^ old) & NUD_PERMANENT) || ext_learn_change) 1404 neigh_update_gc_list(neigh); 1405 1406 if (notify) 1407 neigh_update_notify(neigh, nlmsg_pid); 1408 1409 trace_neigh_update_done(neigh, err); 1410 1411 return err; 1412 } 1413 1414 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, 1415 u32 flags, u32 nlmsg_pid) 1416 { 1417 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL); 1418 } 1419 EXPORT_SYMBOL(neigh_update); 1420 1421 /* Update the neigh to listen temporarily for probe responses, even if it is 1422 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing. 1423 */ 1424 void __neigh_set_probe_once(struct neighbour *neigh) 1425 { 1426 if (neigh->dead) 1427 return; 1428 neigh->updated = jiffies; 1429 if (!(neigh->nud_state & NUD_FAILED)) 1430 return; 1431 neigh->nud_state = NUD_INCOMPLETE; 1432 atomic_set(&neigh->probes, neigh_max_probes(neigh)); 1433 neigh_add_timer(neigh, 1434 jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), 1435 HZ/100)); 1436 } 1437 EXPORT_SYMBOL(__neigh_set_probe_once); 1438 1439 struct neighbour *neigh_event_ns(struct neigh_table *tbl, 1440 u8 *lladdr, void *saddr, 1441 struct net_device *dev) 1442 { 1443 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev, 1444 lladdr || !dev->addr_len); 1445 if (neigh) 1446 neigh_update(neigh, lladdr, NUD_STALE, 1447 NEIGH_UPDATE_F_OVERRIDE, 0); 1448 return neigh; 1449 } 1450 EXPORT_SYMBOL(neigh_event_ns); 1451 1452 /* called with read_lock_bh(&n->lock); */ 1453 static void neigh_hh_init(struct neighbour *n) 1454 { 1455 struct net_device *dev = n->dev; 1456 __be16 prot = n->tbl->protocol; 1457 struct hh_cache *hh = &n->hh; 1458 1459 write_lock_bh(&n->lock); 1460 1461 /* Only one thread can come in here and initialize the 1462 * hh_cache entry. 1463 */ 1464 if (!hh->hh_len) 1465 dev->header_ops->cache(n, hh, prot); 1466 1467 write_unlock_bh(&n->lock); 1468 } 1469 1470 /* Slow and careful. */ 1471 1472 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) 1473 { 1474 int rc = 0; 1475 1476 if (!neigh_event_send(neigh, skb)) { 1477 int err; 1478 struct net_device *dev = neigh->dev; 1479 unsigned int seq; 1480 1481 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len)) 1482 neigh_hh_init(neigh); 1483 1484 do { 1485 __skb_pull(skb, skb_network_offset(skb)); 1486 seq = read_seqbegin(&neigh->ha_lock); 1487 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1488 neigh->ha, NULL, skb->len); 1489 } while (read_seqretry(&neigh->ha_lock, seq)); 1490 1491 if (err >= 0) 1492 rc = dev_queue_xmit(skb); 1493 else 1494 goto out_kfree_skb; 1495 } 1496 out: 1497 return rc; 1498 out_kfree_skb: 1499 rc = -EINVAL; 1500 kfree_skb(skb); 1501 goto out; 1502 } 1503 EXPORT_SYMBOL(neigh_resolve_output); 1504 1505 /* As fast as possible without hh cache */ 1506 1507 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb) 1508 { 1509 struct net_device *dev = neigh->dev; 1510 unsigned int seq; 1511 int err; 1512 1513 do { 1514 __skb_pull(skb, skb_network_offset(skb)); 1515 seq = read_seqbegin(&neigh->ha_lock); 1516 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1517 neigh->ha, NULL, skb->len); 1518 } while (read_seqretry(&neigh->ha_lock, seq)); 1519 1520 if (err >= 0) 1521 err = dev_queue_xmit(skb); 1522 else { 1523 err = -EINVAL; 1524 kfree_skb(skb); 1525 } 1526 return err; 1527 } 1528 EXPORT_SYMBOL(neigh_connected_output); 1529 1530 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb) 1531 { 1532 return dev_queue_xmit(skb); 1533 } 1534 EXPORT_SYMBOL(neigh_direct_output); 1535 1536 static void neigh_proxy_process(struct timer_list *t) 1537 { 1538 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer); 1539 long sched_next = 0; 1540 unsigned long now = jiffies; 1541 struct sk_buff *skb, *n; 1542 1543 spin_lock(&tbl->proxy_queue.lock); 1544 1545 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) { 1546 long tdif = NEIGH_CB(skb)->sched_next - now; 1547 1548 if (tdif <= 0) { 1549 struct net_device *dev = skb->dev; 1550 1551 __skb_unlink(skb, &tbl->proxy_queue); 1552 if (tbl->proxy_redo && netif_running(dev)) { 1553 rcu_read_lock(); 1554 tbl->proxy_redo(skb); 1555 rcu_read_unlock(); 1556 } else { 1557 kfree_skb(skb); 1558 } 1559 1560 dev_put(dev); 1561 } else if (!sched_next || tdif < sched_next) 1562 sched_next = tdif; 1563 } 1564 del_timer(&tbl->proxy_timer); 1565 if (sched_next) 1566 mod_timer(&tbl->proxy_timer, jiffies + sched_next); 1567 spin_unlock(&tbl->proxy_queue.lock); 1568 } 1569 1570 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, 1571 struct sk_buff *skb) 1572 { 1573 unsigned long sched_next = jiffies + 1574 prandom_u32_max(NEIGH_VAR(p, PROXY_DELAY)); 1575 1576 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { 1577 kfree_skb(skb); 1578 return; 1579 } 1580 1581 NEIGH_CB(skb)->sched_next = sched_next; 1582 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED; 1583 1584 spin_lock(&tbl->proxy_queue.lock); 1585 if (del_timer(&tbl->proxy_timer)) { 1586 if (time_before(tbl->proxy_timer.expires, sched_next)) 1587 sched_next = tbl->proxy_timer.expires; 1588 } 1589 skb_dst_drop(skb); 1590 dev_hold(skb->dev); 1591 __skb_queue_tail(&tbl->proxy_queue, skb); 1592 mod_timer(&tbl->proxy_timer, sched_next); 1593 spin_unlock(&tbl->proxy_queue.lock); 1594 } 1595 EXPORT_SYMBOL(pneigh_enqueue); 1596 1597 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, 1598 struct net *net, int ifindex) 1599 { 1600 struct neigh_parms *p; 1601 1602 list_for_each_entry(p, &tbl->parms_list, list) { 1603 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) || 1604 (!p->dev && !ifindex && net_eq(net, &init_net))) 1605 return p; 1606 } 1607 1608 return NULL; 1609 } 1610 1611 struct neigh_parms *neigh_parms_alloc(struct net_device *dev, 1612 struct neigh_table *tbl) 1613 { 1614 struct neigh_parms *p; 1615 struct net *net = dev_net(dev); 1616 const struct net_device_ops *ops = dev->netdev_ops; 1617 1618 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); 1619 if (p) { 1620 p->tbl = tbl; 1621 refcount_set(&p->refcnt, 1); 1622 p->reachable_time = 1623 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 1624 dev_hold(dev); 1625 p->dev = dev; 1626 write_pnet(&p->net, net); 1627 p->sysctl_table = NULL; 1628 1629 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { 1630 dev_put(dev); 1631 kfree(p); 1632 return NULL; 1633 } 1634 1635 write_lock_bh(&tbl->lock); 1636 list_add(&p->list, &tbl->parms.list); 1637 write_unlock_bh(&tbl->lock); 1638 1639 neigh_parms_data_state_cleanall(p); 1640 } 1641 return p; 1642 } 1643 EXPORT_SYMBOL(neigh_parms_alloc); 1644 1645 static void neigh_rcu_free_parms(struct rcu_head *head) 1646 { 1647 struct neigh_parms *parms = 1648 container_of(head, struct neigh_parms, rcu_head); 1649 1650 neigh_parms_put(parms); 1651 } 1652 1653 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) 1654 { 1655 if (!parms || parms == &tbl->parms) 1656 return; 1657 write_lock_bh(&tbl->lock); 1658 list_del(&parms->list); 1659 parms->dead = 1; 1660 write_unlock_bh(&tbl->lock); 1661 if (parms->dev) 1662 dev_put(parms->dev); 1663 call_rcu(&parms->rcu_head, neigh_rcu_free_parms); 1664 } 1665 EXPORT_SYMBOL(neigh_parms_release); 1666 1667 static void neigh_parms_destroy(struct neigh_parms *parms) 1668 { 1669 kfree(parms); 1670 } 1671 1672 static struct lock_class_key neigh_table_proxy_queue_class; 1673 1674 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly; 1675 1676 void neigh_table_init(int index, struct neigh_table *tbl) 1677 { 1678 unsigned long now = jiffies; 1679 unsigned long phsize; 1680 1681 INIT_LIST_HEAD(&tbl->parms_list); 1682 INIT_LIST_HEAD(&tbl->gc_list); 1683 list_add(&tbl->parms.list, &tbl->parms_list); 1684 write_pnet(&tbl->parms.net, &init_net); 1685 refcount_set(&tbl->parms.refcnt, 1); 1686 tbl->parms.reachable_time = 1687 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); 1688 1689 tbl->stats = alloc_percpu(struct neigh_statistics); 1690 if (!tbl->stats) 1691 panic("cannot create neighbour cache statistics"); 1692 1693 #ifdef CONFIG_PROC_FS 1694 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat, 1695 &neigh_stat_seq_ops, tbl)) 1696 panic("cannot create neighbour proc dir entry"); 1697 #endif 1698 1699 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3)); 1700 1701 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); 1702 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); 1703 1704 if (!tbl->nht || !tbl->phash_buckets) 1705 panic("cannot allocate neighbour cache hashes"); 1706 1707 if (!tbl->entry_size) 1708 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) + 1709 tbl->key_len, NEIGH_PRIV_ALIGN); 1710 else 1711 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); 1712 1713 rwlock_init(&tbl->lock); 1714 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); 1715 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 1716 tbl->parms.reachable_time); 1717 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0); 1718 skb_queue_head_init_class(&tbl->proxy_queue, 1719 &neigh_table_proxy_queue_class); 1720 1721 tbl->last_flush = now; 1722 tbl->last_rand = now + tbl->parms.reachable_time * 20; 1723 1724 neigh_tables[index] = tbl; 1725 } 1726 EXPORT_SYMBOL(neigh_table_init); 1727 1728 int neigh_table_clear(int index, struct neigh_table *tbl) 1729 { 1730 neigh_tables[index] = NULL; 1731 /* It is not clean... Fix it to unload IPv6 module safely */ 1732 cancel_delayed_work_sync(&tbl->gc_work); 1733 del_timer_sync(&tbl->proxy_timer); 1734 pneigh_queue_purge(&tbl->proxy_queue); 1735 neigh_ifdown(tbl, NULL); 1736 if (atomic_read(&tbl->entries)) 1737 pr_crit("neighbour leakage\n"); 1738 1739 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, 1740 neigh_hash_free_rcu); 1741 tbl->nht = NULL; 1742 1743 kfree(tbl->phash_buckets); 1744 tbl->phash_buckets = NULL; 1745 1746 remove_proc_entry(tbl->id, init_net.proc_net_stat); 1747 1748 free_percpu(tbl->stats); 1749 tbl->stats = NULL; 1750 1751 return 0; 1752 } 1753 EXPORT_SYMBOL(neigh_table_clear); 1754 1755 static struct neigh_table *neigh_find_table(int family) 1756 { 1757 struct neigh_table *tbl = NULL; 1758 1759 switch (family) { 1760 case AF_INET: 1761 tbl = neigh_tables[NEIGH_ARP_TABLE]; 1762 break; 1763 case AF_INET6: 1764 tbl = neigh_tables[NEIGH_ND_TABLE]; 1765 break; 1766 case AF_DECnet: 1767 tbl = neigh_tables[NEIGH_DN_TABLE]; 1768 break; 1769 } 1770 1771 return tbl; 1772 } 1773 1774 const struct nla_policy nda_policy[NDA_MAX+1] = { 1775 [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID }, 1776 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, 1777 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, 1778 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) }, 1779 [NDA_PROBES] = { .type = NLA_U32 }, 1780 [NDA_VLAN] = { .type = NLA_U16 }, 1781 [NDA_PORT] = { .type = NLA_U16 }, 1782 [NDA_VNI] = { .type = NLA_U32 }, 1783 [NDA_IFINDEX] = { .type = NLA_U32 }, 1784 [NDA_MASTER] = { .type = NLA_U32 }, 1785 [NDA_PROTOCOL] = { .type = NLA_U8 }, 1786 [NDA_NH_ID] = { .type = NLA_U32 }, 1787 [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED }, 1788 }; 1789 1790 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, 1791 struct netlink_ext_ack *extack) 1792 { 1793 struct net *net = sock_net(skb->sk); 1794 struct ndmsg *ndm; 1795 struct nlattr *dst_attr; 1796 struct neigh_table *tbl; 1797 struct neighbour *neigh; 1798 struct net_device *dev = NULL; 1799 int err = -EINVAL; 1800 1801 ASSERT_RTNL(); 1802 if (nlmsg_len(nlh) < sizeof(*ndm)) 1803 goto out; 1804 1805 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST); 1806 if (!dst_attr) { 1807 NL_SET_ERR_MSG(extack, "Network address not specified"); 1808 goto out; 1809 } 1810 1811 ndm = nlmsg_data(nlh); 1812 if (ndm->ndm_ifindex) { 1813 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 1814 if (dev == NULL) { 1815 err = -ENODEV; 1816 goto out; 1817 } 1818 } 1819 1820 tbl = neigh_find_table(ndm->ndm_family); 1821 if (tbl == NULL) 1822 return -EAFNOSUPPORT; 1823 1824 if (nla_len(dst_attr) < (int)tbl->key_len) { 1825 NL_SET_ERR_MSG(extack, "Invalid network address"); 1826 goto out; 1827 } 1828 1829 if (ndm->ndm_flags & NTF_PROXY) { 1830 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); 1831 goto out; 1832 } 1833 1834 if (dev == NULL) 1835 goto out; 1836 1837 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); 1838 if (neigh == NULL) { 1839 err = -ENOENT; 1840 goto out; 1841 } 1842 1843 err = __neigh_update(neigh, NULL, NUD_FAILED, 1844 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, 1845 NETLINK_CB(skb).portid, extack); 1846 write_lock_bh(&tbl->lock); 1847 neigh_release(neigh); 1848 neigh_remove_one(neigh, tbl); 1849 write_unlock_bh(&tbl->lock); 1850 1851 out: 1852 return err; 1853 } 1854 1855 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, 1856 struct netlink_ext_ack *extack) 1857 { 1858 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE | 1859 NEIGH_UPDATE_F_OVERRIDE_ISROUTER; 1860 struct net *net = sock_net(skb->sk); 1861 struct ndmsg *ndm; 1862 struct nlattr *tb[NDA_MAX+1]; 1863 struct neigh_table *tbl; 1864 struct net_device *dev = NULL; 1865 struct neighbour *neigh; 1866 void *dst, *lladdr; 1867 u8 protocol = 0; 1868 int err; 1869 1870 ASSERT_RTNL(); 1871 err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, 1872 nda_policy, extack); 1873 if (err < 0) 1874 goto out; 1875 1876 err = -EINVAL; 1877 if (!tb[NDA_DST]) { 1878 NL_SET_ERR_MSG(extack, "Network address not specified"); 1879 goto out; 1880 } 1881 1882 ndm = nlmsg_data(nlh); 1883 if (ndm->ndm_ifindex) { 1884 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 1885 if (dev == NULL) { 1886 err = -ENODEV; 1887 goto out; 1888 } 1889 1890 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) { 1891 NL_SET_ERR_MSG(extack, "Invalid link address"); 1892 goto out; 1893 } 1894 } 1895 1896 tbl = neigh_find_table(ndm->ndm_family); 1897 if (tbl == NULL) 1898 return -EAFNOSUPPORT; 1899 1900 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) { 1901 NL_SET_ERR_MSG(extack, "Invalid network address"); 1902 goto out; 1903 } 1904 1905 dst = nla_data(tb[NDA_DST]); 1906 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; 1907 1908 if (tb[NDA_PROTOCOL]) 1909 protocol = nla_get_u8(tb[NDA_PROTOCOL]); 1910 1911 if (ndm->ndm_flags & NTF_PROXY) { 1912 struct pneigh_entry *pn; 1913 1914 err = -ENOBUFS; 1915 pn = pneigh_lookup(tbl, net, dst, dev, 1); 1916 if (pn) { 1917 pn->flags = ndm->ndm_flags; 1918 if (protocol) 1919 pn->protocol = protocol; 1920 err = 0; 1921 } 1922 goto out; 1923 } 1924 1925 if (!dev) { 1926 NL_SET_ERR_MSG(extack, "Device not specified"); 1927 goto out; 1928 } 1929 1930 if (tbl->allow_add && !tbl->allow_add(dev, extack)) { 1931 err = -EINVAL; 1932 goto out; 1933 } 1934 1935 neigh = neigh_lookup(tbl, dst, dev); 1936 if (neigh == NULL) { 1937 bool exempt_from_gc; 1938 1939 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { 1940 err = -ENOENT; 1941 goto out; 1942 } 1943 1944 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || 1945 ndm->ndm_flags & NTF_EXT_LEARNED; 1946 neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true); 1947 if (IS_ERR(neigh)) { 1948 err = PTR_ERR(neigh); 1949 goto out; 1950 } 1951 } else { 1952 if (nlh->nlmsg_flags & NLM_F_EXCL) { 1953 err = -EEXIST; 1954 neigh_release(neigh); 1955 goto out; 1956 } 1957 1958 if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) 1959 flags &= ~(NEIGH_UPDATE_F_OVERRIDE | 1960 NEIGH_UPDATE_F_OVERRIDE_ISROUTER); 1961 } 1962 1963 if (protocol) 1964 neigh->protocol = protocol; 1965 1966 if (ndm->ndm_flags & NTF_EXT_LEARNED) 1967 flags |= NEIGH_UPDATE_F_EXT_LEARNED; 1968 1969 if (ndm->ndm_flags & NTF_ROUTER) 1970 flags |= NEIGH_UPDATE_F_ISROUTER; 1971 1972 if (ndm->ndm_flags & NTF_USE) { 1973 neigh_event_send(neigh, NULL); 1974 err = 0; 1975 } else 1976 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, 1977 NETLINK_CB(skb).portid, extack); 1978 1979 neigh_release(neigh); 1980 1981 out: 1982 return err; 1983 } 1984 1985 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) 1986 { 1987 struct nlattr *nest; 1988 1989 nest = nla_nest_start_noflag(skb, NDTA_PARMS); 1990 if (nest == NULL) 1991 return -ENOBUFS; 1992 1993 if ((parms->dev && 1994 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || 1995 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) || 1996 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, 1997 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) || 1998 /* approximative value for deprecated QUEUE_LEN (in packets) */ 1999 nla_put_u32(skb, NDTPA_QUEUE_LEN, 2000 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) || 2001 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) || 2002 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) || 2003 nla_put_u32(skb, NDTPA_UCAST_PROBES, 2004 NEIGH_VAR(parms, UCAST_PROBES)) || 2005 nla_put_u32(skb, NDTPA_MCAST_PROBES, 2006 NEIGH_VAR(parms, MCAST_PROBES)) || 2007 nla_put_u32(skb, NDTPA_MCAST_REPROBES, 2008 NEIGH_VAR(parms, MCAST_REPROBES)) || 2009 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time, 2010 NDTPA_PAD) || 2011 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, 2012 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) || 2013 nla_put_msecs(skb, NDTPA_GC_STALETIME, 2014 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) || 2015 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, 2016 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) || 2017 nla_put_msecs(skb, NDTPA_RETRANS_TIME, 2018 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) || 2019 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, 2020 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) || 2021 nla_put_msecs(skb, NDTPA_PROXY_DELAY, 2022 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) || 2023 nla_put_msecs(skb, NDTPA_LOCKTIME, 2024 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD)) 2025 goto nla_put_failure; 2026 return nla_nest_end(skb, nest); 2027 2028 nla_put_failure: 2029 nla_nest_cancel(skb, nest); 2030 return -EMSGSIZE; 2031 } 2032 2033 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, 2034 u32 pid, u32 seq, int type, int flags) 2035 { 2036 struct nlmsghdr *nlh; 2037 struct ndtmsg *ndtmsg; 2038 2039 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); 2040 if (nlh == NULL) 2041 return -EMSGSIZE; 2042 2043 ndtmsg = nlmsg_data(nlh); 2044 2045 read_lock_bh(&tbl->lock); 2046 ndtmsg->ndtm_family = tbl->family; 2047 ndtmsg->ndtm_pad1 = 0; 2048 ndtmsg->ndtm_pad2 = 0; 2049 2050 if (nla_put_string(skb, NDTA_NAME, tbl->id) || 2051 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) || 2052 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) || 2053 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) || 2054 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3)) 2055 goto nla_put_failure; 2056 { 2057 unsigned long now = jiffies; 2058 long flush_delta = now - tbl->last_flush; 2059 long rand_delta = now - tbl->last_rand; 2060 struct neigh_hash_table *nht; 2061 struct ndt_config ndc = { 2062 .ndtc_key_len = tbl->key_len, 2063 .ndtc_entry_size = tbl->entry_size, 2064 .ndtc_entries = atomic_read(&tbl->entries), 2065 .ndtc_last_flush = jiffies_to_msecs(flush_delta), 2066 .ndtc_last_rand = jiffies_to_msecs(rand_delta), 2067 .ndtc_proxy_qlen = tbl->proxy_queue.qlen, 2068 }; 2069 2070 rcu_read_lock_bh(); 2071 nht = rcu_dereference_bh(tbl->nht); 2072 ndc.ndtc_hash_rnd = nht->hash_rnd[0]; 2073 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); 2074 rcu_read_unlock_bh(); 2075 2076 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc)) 2077 goto nla_put_failure; 2078 } 2079 2080 { 2081 int cpu; 2082 struct ndt_stats ndst; 2083 2084 memset(&ndst, 0, sizeof(ndst)); 2085 2086 for_each_possible_cpu(cpu) { 2087 struct neigh_statistics *st; 2088 2089 st = per_cpu_ptr(tbl->stats, cpu); 2090 ndst.ndts_allocs += st->allocs; 2091 ndst.ndts_destroys += st->destroys; 2092 ndst.ndts_hash_grows += st->hash_grows; 2093 ndst.ndts_res_failed += st->res_failed; 2094 ndst.ndts_lookups += st->lookups; 2095 ndst.ndts_hits += st->hits; 2096 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast; 2097 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast; 2098 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs; 2099 ndst.ndts_forced_gc_runs += st->forced_gc_runs; 2100 ndst.ndts_table_fulls += st->table_fulls; 2101 } 2102 2103 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst, 2104 NDTA_PAD)) 2105 goto nla_put_failure; 2106 } 2107 2108 BUG_ON(tbl->parms.dev); 2109 if (neightbl_fill_parms(skb, &tbl->parms) < 0) 2110 goto nla_put_failure; 2111 2112 read_unlock_bh(&tbl->lock); 2113 nlmsg_end(skb, nlh); 2114 return 0; 2115 2116 nla_put_failure: 2117 read_unlock_bh(&tbl->lock); 2118 nlmsg_cancel(skb, nlh); 2119 return -EMSGSIZE; 2120 } 2121 2122 static int neightbl_fill_param_info(struct sk_buff *skb, 2123 struct neigh_table *tbl, 2124 struct neigh_parms *parms, 2125 u32 pid, u32 seq, int type, 2126 unsigned int flags) 2127 { 2128 struct ndtmsg *ndtmsg; 2129 struct nlmsghdr *nlh; 2130 2131 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); 2132 if (nlh == NULL) 2133 return -EMSGSIZE; 2134 2135 ndtmsg = nlmsg_data(nlh); 2136 2137 read_lock_bh(&tbl->lock); 2138 ndtmsg->ndtm_family = tbl->family; 2139 ndtmsg->ndtm_pad1 = 0; 2140 ndtmsg->ndtm_pad2 = 0; 2141 2142 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 || 2143 neightbl_fill_parms(skb, parms) < 0) 2144 goto errout; 2145 2146 read_unlock_bh(&tbl->lock); 2147 nlmsg_end(skb, nlh); 2148 return 0; 2149 errout: 2150 read_unlock_bh(&tbl->lock); 2151 nlmsg_cancel(skb, nlh); 2152 return -EMSGSIZE; 2153 } 2154 2155 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { 2156 [NDTA_NAME] = { .type = NLA_STRING }, 2157 [NDTA_THRESH1] = { .type = NLA_U32 }, 2158 [NDTA_THRESH2] = { .type = NLA_U32 }, 2159 [NDTA_THRESH3] = { .type = NLA_U32 }, 2160 [NDTA_GC_INTERVAL] = { .type = NLA_U64 }, 2161 [NDTA_PARMS] = { .type = NLA_NESTED }, 2162 }; 2163 2164 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { 2165 [NDTPA_IFINDEX] = { .type = NLA_U32 }, 2166 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, 2167 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, 2168 [NDTPA_APP_PROBES] = { .type = NLA_U32 }, 2169 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, 2170 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 }, 2171 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 }, 2172 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, 2173 [NDTPA_GC_STALETIME] = { .type = NLA_U64 }, 2174 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 }, 2175 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 }, 2176 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 }, 2177 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 }, 2178 [NDTPA_LOCKTIME] = { .type = NLA_U64 }, 2179 }; 2180 2181 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, 2182 struct netlink_ext_ack *extack) 2183 { 2184 struct net *net = sock_net(skb->sk); 2185 struct neigh_table *tbl; 2186 struct ndtmsg *ndtmsg; 2187 struct nlattr *tb[NDTA_MAX+1]; 2188 bool found = false; 2189 int err, tidx; 2190 2191 err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, 2192 nl_neightbl_policy, extack); 2193 if (err < 0) 2194 goto errout; 2195 2196 if (tb[NDTA_NAME] == NULL) { 2197 err = -EINVAL; 2198 goto errout; 2199 } 2200 2201 ndtmsg = nlmsg_data(nlh); 2202 2203 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { 2204 tbl = neigh_tables[tidx]; 2205 if (!tbl) 2206 continue; 2207 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) 2208 continue; 2209 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) { 2210 found = true; 2211 break; 2212 } 2213 } 2214 2215 if (!found) 2216 return -ENOENT; 2217 2218 /* 2219 * We acquire tbl->lock to be nice to the periodic timers and 2220 * make sure they always see a consistent set of values. 2221 */ 2222 write_lock_bh(&tbl->lock); 2223 2224 if (tb[NDTA_PARMS]) { 2225 struct nlattr *tbp[NDTPA_MAX+1]; 2226 struct neigh_parms *p; 2227 int i, ifindex = 0; 2228 2229 err = nla_parse_nested_deprecated(tbp, NDTPA_MAX, 2230 tb[NDTA_PARMS], 2231 nl_ntbl_parm_policy, extack); 2232 if (err < 0) 2233 goto errout_tbl_lock; 2234 2235 if (tbp[NDTPA_IFINDEX]) 2236 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); 2237 2238 p = lookup_neigh_parms(tbl, net, ifindex); 2239 if (p == NULL) { 2240 err = -ENOENT; 2241 goto errout_tbl_lock; 2242 } 2243 2244 for (i = 1; i <= NDTPA_MAX; i++) { 2245 if (tbp[i] == NULL) 2246 continue; 2247 2248 switch (i) { 2249 case NDTPA_QUEUE_LEN: 2250 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, 2251 nla_get_u32(tbp[i]) * 2252 SKB_TRUESIZE(ETH_FRAME_LEN)); 2253 break; 2254 case NDTPA_QUEUE_LENBYTES: 2255 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, 2256 nla_get_u32(tbp[i])); 2257 break; 2258 case NDTPA_PROXY_QLEN: 2259 NEIGH_VAR_SET(p, PROXY_QLEN, 2260 nla_get_u32(tbp[i])); 2261 break; 2262 case NDTPA_APP_PROBES: 2263 NEIGH_VAR_SET(p, APP_PROBES, 2264 nla_get_u32(tbp[i])); 2265 break; 2266 case NDTPA_UCAST_PROBES: 2267 NEIGH_VAR_SET(p, UCAST_PROBES, 2268 nla_get_u32(tbp[i])); 2269 break; 2270 case NDTPA_MCAST_PROBES: 2271 NEIGH_VAR_SET(p, MCAST_PROBES, 2272 nla_get_u32(tbp[i])); 2273 break; 2274 case NDTPA_MCAST_REPROBES: 2275 NEIGH_VAR_SET(p, MCAST_REPROBES, 2276 nla_get_u32(tbp[i])); 2277 break; 2278 case NDTPA_BASE_REACHABLE_TIME: 2279 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, 2280 nla_get_msecs(tbp[i])); 2281 /* update reachable_time as well, otherwise, the change will 2282 * only be effective after the next time neigh_periodic_work 2283 * decides to recompute it (can be multiple minutes) 2284 */ 2285 p->reachable_time = 2286 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 2287 break; 2288 case NDTPA_GC_STALETIME: 2289 NEIGH_VAR_SET(p, GC_STALETIME, 2290 nla_get_msecs(tbp[i])); 2291 break; 2292 case NDTPA_DELAY_PROBE_TIME: 2293 NEIGH_VAR_SET(p, DELAY_PROBE_TIME, 2294 nla_get_msecs(tbp[i])); 2295 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); 2296 break; 2297 case NDTPA_RETRANS_TIME: 2298 NEIGH_VAR_SET(p, RETRANS_TIME, 2299 nla_get_msecs(tbp[i])); 2300 break; 2301 case NDTPA_ANYCAST_DELAY: 2302 NEIGH_VAR_SET(p, ANYCAST_DELAY, 2303 nla_get_msecs(tbp[i])); 2304 break; 2305 case NDTPA_PROXY_DELAY: 2306 NEIGH_VAR_SET(p, PROXY_DELAY, 2307 nla_get_msecs(tbp[i])); 2308 break; 2309 case NDTPA_LOCKTIME: 2310 NEIGH_VAR_SET(p, LOCKTIME, 2311 nla_get_msecs(tbp[i])); 2312 break; 2313 } 2314 } 2315 } 2316 2317 err = -ENOENT; 2318 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] || 2319 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) && 2320 !net_eq(net, &init_net)) 2321 goto errout_tbl_lock; 2322 2323 if (tb[NDTA_THRESH1]) 2324 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); 2325 2326 if (tb[NDTA_THRESH2]) 2327 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]); 2328 2329 if (tb[NDTA_THRESH3]) 2330 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]); 2331 2332 if (tb[NDTA_GC_INTERVAL]) 2333 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]); 2334 2335 err = 0; 2336 2337 errout_tbl_lock: 2338 write_unlock_bh(&tbl->lock); 2339 errout: 2340 return err; 2341 } 2342 2343 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh, 2344 struct netlink_ext_ack *extack) 2345 { 2346 struct ndtmsg *ndtm; 2347 2348 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) { 2349 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request"); 2350 return -EINVAL; 2351 } 2352 2353 ndtm = nlmsg_data(nlh); 2354 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) { 2355 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request"); 2356 return -EINVAL; 2357 } 2358 2359 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) { 2360 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request"); 2361 return -EINVAL; 2362 } 2363 2364 return 0; 2365 } 2366 2367 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2368 { 2369 const struct nlmsghdr *nlh = cb->nlh; 2370 struct net *net = sock_net(skb->sk); 2371 int family, tidx, nidx = 0; 2372 int tbl_skip = cb->args[0]; 2373 int neigh_skip = cb->args[1]; 2374 struct neigh_table *tbl; 2375 2376 if (cb->strict_check) { 2377 int err = neightbl_valid_dump_info(nlh, cb->extack); 2378 2379 if (err < 0) 2380 return err; 2381 } 2382 2383 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; 2384 2385 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { 2386 struct neigh_parms *p; 2387 2388 tbl = neigh_tables[tidx]; 2389 if (!tbl) 2390 continue; 2391 2392 if (tidx < tbl_skip || (family && tbl->family != family)) 2393 continue; 2394 2395 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, 2396 nlh->nlmsg_seq, RTM_NEWNEIGHTBL, 2397 NLM_F_MULTI) < 0) 2398 break; 2399 2400 nidx = 0; 2401 p = list_next_entry(&tbl->parms, list); 2402 list_for_each_entry_from(p, &tbl->parms_list, list) { 2403 if (!net_eq(neigh_parms_net(p), net)) 2404 continue; 2405 2406 if (nidx < neigh_skip) 2407 goto next; 2408 2409 if (neightbl_fill_param_info(skb, tbl, p, 2410 NETLINK_CB(cb->skb).portid, 2411 nlh->nlmsg_seq, 2412 RTM_NEWNEIGHTBL, 2413 NLM_F_MULTI) < 0) 2414 goto out; 2415 next: 2416 nidx++; 2417 } 2418 2419 neigh_skip = 0; 2420 } 2421 out: 2422 cb->args[0] = tidx; 2423 cb->args[1] = nidx; 2424 2425 return skb->len; 2426 } 2427 2428 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, 2429 u32 pid, u32 seq, int type, unsigned int flags) 2430 { 2431 unsigned long now = jiffies; 2432 struct nda_cacheinfo ci; 2433 struct nlmsghdr *nlh; 2434 struct ndmsg *ndm; 2435 2436 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); 2437 if (nlh == NULL) 2438 return -EMSGSIZE; 2439 2440 ndm = nlmsg_data(nlh); 2441 ndm->ndm_family = neigh->ops->family; 2442 ndm->ndm_pad1 = 0; 2443 ndm->ndm_pad2 = 0; 2444 ndm->ndm_flags = neigh->flags; 2445 ndm->ndm_type = neigh->type; 2446 ndm->ndm_ifindex = neigh->dev->ifindex; 2447 2448 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key)) 2449 goto nla_put_failure; 2450 2451 read_lock_bh(&neigh->lock); 2452 ndm->ndm_state = neigh->nud_state; 2453 if (neigh->nud_state & NUD_VALID) { 2454 char haddr[MAX_ADDR_LEN]; 2455 2456 neigh_ha_snapshot(haddr, neigh, neigh->dev); 2457 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) { 2458 read_unlock_bh(&neigh->lock); 2459 goto nla_put_failure; 2460 } 2461 } 2462 2463 ci.ndm_used = jiffies_to_clock_t(now - neigh->used); 2464 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed); 2465 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated); 2466 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1; 2467 read_unlock_bh(&neigh->lock); 2468 2469 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) || 2470 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) 2471 goto nla_put_failure; 2472 2473 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol)) 2474 goto nla_put_failure; 2475 2476 nlmsg_end(skb, nlh); 2477 return 0; 2478 2479 nla_put_failure: 2480 nlmsg_cancel(skb, nlh); 2481 return -EMSGSIZE; 2482 } 2483 2484 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, 2485 u32 pid, u32 seq, int type, unsigned int flags, 2486 struct neigh_table *tbl) 2487 { 2488 struct nlmsghdr *nlh; 2489 struct ndmsg *ndm; 2490 2491 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); 2492 if (nlh == NULL) 2493 return -EMSGSIZE; 2494 2495 ndm = nlmsg_data(nlh); 2496 ndm->ndm_family = tbl->family; 2497 ndm->ndm_pad1 = 0; 2498 ndm->ndm_pad2 = 0; 2499 ndm->ndm_flags = pn->flags | NTF_PROXY; 2500 ndm->ndm_type = RTN_UNICAST; 2501 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0; 2502 ndm->ndm_state = NUD_NONE; 2503 2504 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) 2505 goto nla_put_failure; 2506 2507 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol)) 2508 goto nla_put_failure; 2509 2510 nlmsg_end(skb, nlh); 2511 return 0; 2512 2513 nla_put_failure: 2514 nlmsg_cancel(skb, nlh); 2515 return -EMSGSIZE; 2516 } 2517 2518 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid) 2519 { 2520 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); 2521 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid); 2522 } 2523 2524 static bool neigh_master_filtered(struct net_device *dev, int master_idx) 2525 { 2526 struct net_device *master; 2527 2528 if (!master_idx) 2529 return false; 2530 2531 master = dev ? netdev_master_upper_dev_get(dev) : NULL; 2532 if (!master || master->ifindex != master_idx) 2533 return true; 2534 2535 return false; 2536 } 2537 2538 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx) 2539 { 2540 if (filter_idx && (!dev || dev->ifindex != filter_idx)) 2541 return true; 2542 2543 return false; 2544 } 2545 2546 struct neigh_dump_filter { 2547 int master_idx; 2548 int dev_idx; 2549 }; 2550 2551 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2552 struct netlink_callback *cb, 2553 struct neigh_dump_filter *filter) 2554 { 2555 struct net *net = sock_net(skb->sk); 2556 struct neighbour *n; 2557 int rc, h, s_h = cb->args[1]; 2558 int idx, s_idx = idx = cb->args[2]; 2559 struct neigh_hash_table *nht; 2560 unsigned int flags = NLM_F_MULTI; 2561 2562 if (filter->dev_idx || filter->master_idx) 2563 flags |= NLM_F_DUMP_FILTERED; 2564 2565 rcu_read_lock_bh(); 2566 nht = rcu_dereference_bh(tbl->nht); 2567 2568 for (h = s_h; h < (1 << nht->hash_shift); h++) { 2569 if (h > s_h) 2570 s_idx = 0; 2571 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; 2572 n != NULL; 2573 n = rcu_dereference_bh(n->next)) { 2574 if (idx < s_idx || !net_eq(dev_net(n->dev), net)) 2575 goto next; 2576 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || 2577 neigh_master_filtered(n->dev, filter->master_idx)) 2578 goto next; 2579 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2580 cb->nlh->nlmsg_seq, 2581 RTM_NEWNEIGH, 2582 flags) < 0) { 2583 rc = -1; 2584 goto out; 2585 } 2586 next: 2587 idx++; 2588 } 2589 } 2590 rc = skb->len; 2591 out: 2592 rcu_read_unlock_bh(); 2593 cb->args[1] = h; 2594 cb->args[2] = idx; 2595 return rc; 2596 } 2597 2598 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2599 struct netlink_callback *cb, 2600 struct neigh_dump_filter *filter) 2601 { 2602 struct pneigh_entry *n; 2603 struct net *net = sock_net(skb->sk); 2604 int rc, h, s_h = cb->args[3]; 2605 int idx, s_idx = idx = cb->args[4]; 2606 unsigned int flags = NLM_F_MULTI; 2607 2608 if (filter->dev_idx || filter->master_idx) 2609 flags |= NLM_F_DUMP_FILTERED; 2610 2611 read_lock_bh(&tbl->lock); 2612 2613 for (h = s_h; h <= PNEIGH_HASHMASK; h++) { 2614 if (h > s_h) 2615 s_idx = 0; 2616 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { 2617 if (idx < s_idx || pneigh_net(n) != net) 2618 goto next; 2619 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || 2620 neigh_master_filtered(n->dev, filter->master_idx)) 2621 goto next; 2622 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2623 cb->nlh->nlmsg_seq, 2624 RTM_NEWNEIGH, flags, tbl) < 0) { 2625 read_unlock_bh(&tbl->lock); 2626 rc = -1; 2627 goto out; 2628 } 2629 next: 2630 idx++; 2631 } 2632 } 2633 2634 read_unlock_bh(&tbl->lock); 2635 rc = skb->len; 2636 out: 2637 cb->args[3] = h; 2638 cb->args[4] = idx; 2639 return rc; 2640 2641 } 2642 2643 static int neigh_valid_dump_req(const struct nlmsghdr *nlh, 2644 bool strict_check, 2645 struct neigh_dump_filter *filter, 2646 struct netlink_ext_ack *extack) 2647 { 2648 struct nlattr *tb[NDA_MAX + 1]; 2649 int err, i; 2650 2651 if (strict_check) { 2652 struct ndmsg *ndm; 2653 2654 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { 2655 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request"); 2656 return -EINVAL; 2657 } 2658 2659 ndm = nlmsg_data(nlh); 2660 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex || 2661 ndm->ndm_state || ndm->ndm_type) { 2662 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request"); 2663 return -EINVAL; 2664 } 2665 2666 if (ndm->ndm_flags & ~NTF_PROXY) { 2667 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request"); 2668 return -EINVAL; 2669 } 2670 2671 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), 2672 tb, NDA_MAX, nda_policy, 2673 extack); 2674 } else { 2675 err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb, 2676 NDA_MAX, nda_policy, extack); 2677 } 2678 if (err < 0) 2679 return err; 2680 2681 for (i = 0; i <= NDA_MAX; ++i) { 2682 if (!tb[i]) 2683 continue; 2684 2685 /* all new attributes should require strict_check */ 2686 switch (i) { 2687 case NDA_IFINDEX: 2688 filter->dev_idx = nla_get_u32(tb[i]); 2689 break; 2690 case NDA_MASTER: 2691 filter->master_idx = nla_get_u32(tb[i]); 2692 break; 2693 default: 2694 if (strict_check) { 2695 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request"); 2696 return -EINVAL; 2697 } 2698 } 2699 } 2700 2701 return 0; 2702 } 2703 2704 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2705 { 2706 const struct nlmsghdr *nlh = cb->nlh; 2707 struct neigh_dump_filter filter = {}; 2708 struct neigh_table *tbl; 2709 int t, family, s_t; 2710 int proxy = 0; 2711 int err; 2712 2713 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; 2714 2715 /* check for full ndmsg structure presence, family member is 2716 * the same for both structures 2717 */ 2718 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) && 2719 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY) 2720 proxy = 1; 2721 2722 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack); 2723 if (err < 0 && cb->strict_check) 2724 return err; 2725 2726 s_t = cb->args[0]; 2727 2728 for (t = 0; t < NEIGH_NR_TABLES; t++) { 2729 tbl = neigh_tables[t]; 2730 2731 if (!tbl) 2732 continue; 2733 if (t < s_t || (family && tbl->family != family)) 2734 continue; 2735 if (t > s_t) 2736 memset(&cb->args[1], 0, sizeof(cb->args) - 2737 sizeof(cb->args[0])); 2738 if (proxy) 2739 err = pneigh_dump_table(tbl, skb, cb, &filter); 2740 else 2741 err = neigh_dump_table(tbl, skb, cb, &filter); 2742 if (err < 0) 2743 break; 2744 } 2745 2746 cb->args[0] = t; 2747 return skb->len; 2748 } 2749 2750 static int neigh_valid_get_req(const struct nlmsghdr *nlh, 2751 struct neigh_table **tbl, 2752 void **dst, int *dev_idx, u8 *ndm_flags, 2753 struct netlink_ext_ack *extack) 2754 { 2755 struct nlattr *tb[NDA_MAX + 1]; 2756 struct ndmsg *ndm; 2757 int err, i; 2758 2759 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { 2760 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request"); 2761 return -EINVAL; 2762 } 2763 2764 ndm = nlmsg_data(nlh); 2765 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || 2766 ndm->ndm_type) { 2767 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request"); 2768 return -EINVAL; 2769 } 2770 2771 if (ndm->ndm_flags & ~NTF_PROXY) { 2772 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request"); 2773 return -EINVAL; 2774 } 2775 2776 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb, 2777 NDA_MAX, nda_policy, extack); 2778 if (err < 0) 2779 return err; 2780 2781 *ndm_flags = ndm->ndm_flags; 2782 *dev_idx = ndm->ndm_ifindex; 2783 *tbl = neigh_find_table(ndm->ndm_family); 2784 if (*tbl == NULL) { 2785 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request"); 2786 return -EAFNOSUPPORT; 2787 } 2788 2789 for (i = 0; i <= NDA_MAX; ++i) { 2790 if (!tb[i]) 2791 continue; 2792 2793 switch (i) { 2794 case NDA_DST: 2795 if (nla_len(tb[i]) != (int)(*tbl)->key_len) { 2796 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request"); 2797 return -EINVAL; 2798 } 2799 *dst = nla_data(tb[i]); 2800 break; 2801 default: 2802 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request"); 2803 return -EINVAL; 2804 } 2805 } 2806 2807 return 0; 2808 } 2809 2810 static inline size_t neigh_nlmsg_size(void) 2811 { 2812 return NLMSG_ALIGN(sizeof(struct ndmsg)) 2813 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ 2814 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */ 2815 + nla_total_size(sizeof(struct nda_cacheinfo)) 2816 + nla_total_size(4) /* NDA_PROBES */ 2817 + nla_total_size(1); /* NDA_PROTOCOL */ 2818 } 2819 2820 static int neigh_get_reply(struct net *net, struct neighbour *neigh, 2821 u32 pid, u32 seq) 2822 { 2823 struct sk_buff *skb; 2824 int err = 0; 2825 2826 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL); 2827 if (!skb) 2828 return -ENOBUFS; 2829 2830 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0); 2831 if (err) { 2832 kfree_skb(skb); 2833 goto errout; 2834 } 2835 2836 err = rtnl_unicast(skb, net, pid); 2837 errout: 2838 return err; 2839 } 2840 2841 static inline size_t pneigh_nlmsg_size(void) 2842 { 2843 return NLMSG_ALIGN(sizeof(struct ndmsg)) 2844 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ 2845 + nla_total_size(1); /* NDA_PROTOCOL */ 2846 } 2847 2848 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh, 2849 u32 pid, u32 seq, struct neigh_table *tbl) 2850 { 2851 struct sk_buff *skb; 2852 int err = 0; 2853 2854 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL); 2855 if (!skb) 2856 return -ENOBUFS; 2857 2858 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl); 2859 if (err) { 2860 kfree_skb(skb); 2861 goto errout; 2862 } 2863 2864 err = rtnl_unicast(skb, net, pid); 2865 errout: 2866 return err; 2867 } 2868 2869 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2870 struct netlink_ext_ack *extack) 2871 { 2872 struct net *net = sock_net(in_skb->sk); 2873 struct net_device *dev = NULL; 2874 struct neigh_table *tbl = NULL; 2875 struct neighbour *neigh; 2876 void *dst = NULL; 2877 u8 ndm_flags = 0; 2878 int dev_idx = 0; 2879 int err; 2880 2881 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags, 2882 extack); 2883 if (err < 0) 2884 return err; 2885 2886 if (dev_idx) { 2887 dev = __dev_get_by_index(net, dev_idx); 2888 if (!dev) { 2889 NL_SET_ERR_MSG(extack, "Unknown device ifindex"); 2890 return -ENODEV; 2891 } 2892 } 2893 2894 if (!dst) { 2895 NL_SET_ERR_MSG(extack, "Network address not specified"); 2896 return -EINVAL; 2897 } 2898 2899 if (ndm_flags & NTF_PROXY) { 2900 struct pneigh_entry *pn; 2901 2902 pn = pneigh_lookup(tbl, net, dst, dev, 0); 2903 if (!pn) { 2904 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found"); 2905 return -ENOENT; 2906 } 2907 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid, 2908 nlh->nlmsg_seq, tbl); 2909 } 2910 2911 if (!dev) { 2912 NL_SET_ERR_MSG(extack, "No device specified"); 2913 return -EINVAL; 2914 } 2915 2916 neigh = neigh_lookup(tbl, dst, dev); 2917 if (!neigh) { 2918 NL_SET_ERR_MSG(extack, "Neighbour entry not found"); 2919 return -ENOENT; 2920 } 2921 2922 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid, 2923 nlh->nlmsg_seq); 2924 2925 neigh_release(neigh); 2926 2927 return err; 2928 } 2929 2930 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) 2931 { 2932 int chain; 2933 struct neigh_hash_table *nht; 2934 2935 rcu_read_lock_bh(); 2936 nht = rcu_dereference_bh(tbl->nht); 2937 2938 read_lock(&tbl->lock); /* avoid resizes */ 2939 for (chain = 0; chain < (1 << nht->hash_shift); chain++) { 2940 struct neighbour *n; 2941 2942 for (n = rcu_dereference_bh(nht->hash_buckets[chain]); 2943 n != NULL; 2944 n = rcu_dereference_bh(n->next)) 2945 cb(n, cookie); 2946 } 2947 read_unlock(&tbl->lock); 2948 rcu_read_unlock_bh(); 2949 } 2950 EXPORT_SYMBOL(neigh_for_each); 2951 2952 /* The tbl->lock must be held as a writer and BH disabled. */ 2953 void __neigh_for_each_release(struct neigh_table *tbl, 2954 int (*cb)(struct neighbour *)) 2955 { 2956 int chain; 2957 struct neigh_hash_table *nht; 2958 2959 nht = rcu_dereference_protected(tbl->nht, 2960 lockdep_is_held(&tbl->lock)); 2961 for (chain = 0; chain < (1 << nht->hash_shift); chain++) { 2962 struct neighbour *n; 2963 struct neighbour __rcu **np; 2964 2965 np = &nht->hash_buckets[chain]; 2966 while ((n = rcu_dereference_protected(*np, 2967 lockdep_is_held(&tbl->lock))) != NULL) { 2968 int release; 2969 2970 write_lock(&n->lock); 2971 release = cb(n); 2972 if (release) { 2973 rcu_assign_pointer(*np, 2974 rcu_dereference_protected(n->next, 2975 lockdep_is_held(&tbl->lock))); 2976 neigh_mark_dead(n); 2977 } else 2978 np = &n->next; 2979 write_unlock(&n->lock); 2980 if (release) 2981 neigh_cleanup_and_release(n); 2982 } 2983 } 2984 } 2985 EXPORT_SYMBOL(__neigh_for_each_release); 2986 2987 int neigh_xmit(int index, struct net_device *dev, 2988 const void *addr, struct sk_buff *skb) 2989 { 2990 int err = -EAFNOSUPPORT; 2991 if (likely(index < NEIGH_NR_TABLES)) { 2992 struct neigh_table *tbl; 2993 struct neighbour *neigh; 2994 2995 tbl = neigh_tables[index]; 2996 if (!tbl) 2997 goto out; 2998 rcu_read_lock_bh(); 2999 if (index == NEIGH_ARP_TABLE) { 3000 u32 key = *((u32 *)addr); 3001 3002 neigh = __ipv4_neigh_lookup_noref(dev, key); 3003 } else { 3004 neigh = __neigh_lookup_noref(tbl, addr, dev); 3005 } 3006 if (!neigh) 3007 neigh = __neigh_create(tbl, addr, dev, false); 3008 err = PTR_ERR(neigh); 3009 if (IS_ERR(neigh)) { 3010 rcu_read_unlock_bh(); 3011 goto out_kfree_skb; 3012 } 3013 err = neigh->output(neigh, skb); 3014 rcu_read_unlock_bh(); 3015 } 3016 else if (index == NEIGH_LINK_TABLE) { 3017 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 3018 addr, NULL, skb->len); 3019 if (err < 0) 3020 goto out_kfree_skb; 3021 err = dev_queue_xmit(skb); 3022 } 3023 out: 3024 return err; 3025 out_kfree_skb: 3026 kfree_skb(skb); 3027 goto out; 3028 } 3029 EXPORT_SYMBOL(neigh_xmit); 3030 3031 #ifdef CONFIG_PROC_FS 3032 3033 static struct neighbour *neigh_get_first(struct seq_file *seq) 3034 { 3035 struct neigh_seq_state *state = seq->private; 3036 struct net *net = seq_file_net(seq); 3037 struct neigh_hash_table *nht = state->nht; 3038 struct neighbour *n = NULL; 3039 int bucket; 3040 3041 state->flags &= ~NEIGH_SEQ_IS_PNEIGH; 3042 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { 3043 n = rcu_dereference_bh(nht->hash_buckets[bucket]); 3044 3045 while (n) { 3046 if (!net_eq(dev_net(n->dev), net)) 3047 goto next; 3048 if (state->neigh_sub_iter) { 3049 loff_t fakep = 0; 3050 void *v; 3051 3052 v = state->neigh_sub_iter(state, n, &fakep); 3053 if (!v) 3054 goto next; 3055 } 3056 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) 3057 break; 3058 if (n->nud_state & ~NUD_NOARP) 3059 break; 3060 next: 3061 n = rcu_dereference_bh(n->next); 3062 } 3063 3064 if (n) 3065 break; 3066 } 3067 state->bucket = bucket; 3068 3069 return n; 3070 } 3071 3072 static struct neighbour *neigh_get_next(struct seq_file *seq, 3073 struct neighbour *n, 3074 loff_t *pos) 3075 { 3076 struct neigh_seq_state *state = seq->private; 3077 struct net *net = seq_file_net(seq); 3078 struct neigh_hash_table *nht = state->nht; 3079 3080 if (state->neigh_sub_iter) { 3081 void *v = state->neigh_sub_iter(state, n, pos); 3082 if (v) 3083 return n; 3084 } 3085 n = rcu_dereference_bh(n->next); 3086 3087 while (1) { 3088 while (n) { 3089 if (!net_eq(dev_net(n->dev), net)) 3090 goto next; 3091 if (state->neigh_sub_iter) { 3092 void *v = state->neigh_sub_iter(state, n, pos); 3093 if (v) 3094 return n; 3095 goto next; 3096 } 3097 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) 3098 break; 3099 3100 if (n->nud_state & ~NUD_NOARP) 3101 break; 3102 next: 3103 n = rcu_dereference_bh(n->next); 3104 } 3105 3106 if (n) 3107 break; 3108 3109 if (++state->bucket >= (1 << nht->hash_shift)) 3110 break; 3111 3112 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]); 3113 } 3114 3115 if (n && pos) 3116 --(*pos); 3117 return n; 3118 } 3119 3120 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) 3121 { 3122 struct neighbour *n = neigh_get_first(seq); 3123 3124 if (n) { 3125 --(*pos); 3126 while (*pos) { 3127 n = neigh_get_next(seq, n, pos); 3128 if (!n) 3129 break; 3130 } 3131 } 3132 return *pos ? NULL : n; 3133 } 3134 3135 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) 3136 { 3137 struct neigh_seq_state *state = seq->private; 3138 struct net *net = seq_file_net(seq); 3139 struct neigh_table *tbl = state->tbl; 3140 struct pneigh_entry *pn = NULL; 3141 int bucket = state->bucket; 3142 3143 state->flags |= NEIGH_SEQ_IS_PNEIGH; 3144 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { 3145 pn = tbl->phash_buckets[bucket]; 3146 while (pn && !net_eq(pneigh_net(pn), net)) 3147 pn = pn->next; 3148 if (pn) 3149 break; 3150 } 3151 state->bucket = bucket; 3152 3153 return pn; 3154 } 3155 3156 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, 3157 struct pneigh_entry *pn, 3158 loff_t *pos) 3159 { 3160 struct neigh_seq_state *state = seq->private; 3161 struct net *net = seq_file_net(seq); 3162 struct neigh_table *tbl = state->tbl; 3163 3164 do { 3165 pn = pn->next; 3166 } while (pn && !net_eq(pneigh_net(pn), net)); 3167 3168 while (!pn) { 3169 if (++state->bucket > PNEIGH_HASHMASK) 3170 break; 3171 pn = tbl->phash_buckets[state->bucket]; 3172 while (pn && !net_eq(pneigh_net(pn), net)) 3173 pn = pn->next; 3174 if (pn) 3175 break; 3176 } 3177 3178 if (pn && pos) 3179 --(*pos); 3180 3181 return pn; 3182 } 3183 3184 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos) 3185 { 3186 struct pneigh_entry *pn = pneigh_get_first(seq); 3187 3188 if (pn) { 3189 --(*pos); 3190 while (*pos) { 3191 pn = pneigh_get_next(seq, pn, pos); 3192 if (!pn) 3193 break; 3194 } 3195 } 3196 return *pos ? NULL : pn; 3197 } 3198 3199 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) 3200 { 3201 struct neigh_seq_state *state = seq->private; 3202 void *rc; 3203 loff_t idxpos = *pos; 3204 3205 rc = neigh_get_idx(seq, &idxpos); 3206 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY)) 3207 rc = pneigh_get_idx(seq, &idxpos); 3208 3209 return rc; 3210 } 3211 3212 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) 3213 __acquires(tbl->lock) 3214 __acquires(rcu_bh) 3215 { 3216 struct neigh_seq_state *state = seq->private; 3217 3218 state->tbl = tbl; 3219 state->bucket = 0; 3220 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); 3221 3222 rcu_read_lock_bh(); 3223 state->nht = rcu_dereference_bh(tbl->nht); 3224 read_lock(&tbl->lock); 3225 3226 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; 3227 } 3228 EXPORT_SYMBOL(neigh_seq_start); 3229 3230 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3231 { 3232 struct neigh_seq_state *state; 3233 void *rc; 3234 3235 if (v == SEQ_START_TOKEN) { 3236 rc = neigh_get_first(seq); 3237 goto out; 3238 } 3239 3240 state = seq->private; 3241 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) { 3242 rc = neigh_get_next(seq, v, NULL); 3243 if (rc) 3244 goto out; 3245 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY)) 3246 rc = pneigh_get_first(seq); 3247 } else { 3248 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY); 3249 rc = pneigh_get_next(seq, v, NULL); 3250 } 3251 out: 3252 ++(*pos); 3253 return rc; 3254 } 3255 EXPORT_SYMBOL(neigh_seq_next); 3256 3257 void neigh_seq_stop(struct seq_file *seq, void *v) 3258 __releases(tbl->lock) 3259 __releases(rcu_bh) 3260 { 3261 struct neigh_seq_state *state = seq->private; 3262 struct neigh_table *tbl = state->tbl; 3263 3264 read_unlock(&tbl->lock); 3265 rcu_read_unlock_bh(); 3266 } 3267 EXPORT_SYMBOL(neigh_seq_stop); 3268 3269 /* statistics via seq_file */ 3270 3271 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) 3272 { 3273 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3274 int cpu; 3275 3276 if (*pos == 0) 3277 return SEQ_START_TOKEN; 3278 3279 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { 3280 if (!cpu_possible(cpu)) 3281 continue; 3282 *pos = cpu+1; 3283 return per_cpu_ptr(tbl->stats, cpu); 3284 } 3285 return NULL; 3286 } 3287 3288 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3289 { 3290 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3291 int cpu; 3292 3293 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { 3294 if (!cpu_possible(cpu)) 3295 continue; 3296 *pos = cpu+1; 3297 return per_cpu_ptr(tbl->stats, cpu); 3298 } 3299 (*pos)++; 3300 return NULL; 3301 } 3302 3303 static void neigh_stat_seq_stop(struct seq_file *seq, void *v) 3304 { 3305 3306 } 3307 3308 static int neigh_stat_seq_show(struct seq_file *seq, void *v) 3309 { 3310 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3311 struct neigh_statistics *st = v; 3312 3313 if (v == SEQ_START_TOKEN) { 3314 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n"); 3315 return 0; 3316 } 3317 3318 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " 3319 "%08lx %08lx %08lx %08lx %08lx %08lx\n", 3320 atomic_read(&tbl->entries), 3321 3322 st->allocs, 3323 st->destroys, 3324 st->hash_grows, 3325 3326 st->lookups, 3327 st->hits, 3328 3329 st->res_failed, 3330 3331 st->rcv_probes_mcast, 3332 st->rcv_probes_ucast, 3333 3334 st->periodic_gc_runs, 3335 st->forced_gc_runs, 3336 st->unres_discards, 3337 st->table_fulls 3338 ); 3339 3340 return 0; 3341 } 3342 3343 static const struct seq_operations neigh_stat_seq_ops = { 3344 .start = neigh_stat_seq_start, 3345 .next = neigh_stat_seq_next, 3346 .stop = neigh_stat_seq_stop, 3347 .show = neigh_stat_seq_show, 3348 }; 3349 #endif /* CONFIG_PROC_FS */ 3350 3351 static void __neigh_notify(struct neighbour *n, int type, int flags, 3352 u32 pid) 3353 { 3354 struct net *net = dev_net(n->dev); 3355 struct sk_buff *skb; 3356 int err = -ENOBUFS; 3357 3358 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC); 3359 if (skb == NULL) 3360 goto errout; 3361 3362 err = neigh_fill_info(skb, n, pid, 0, type, flags); 3363 if (err < 0) { 3364 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */ 3365 WARN_ON(err == -EMSGSIZE); 3366 kfree_skb(skb); 3367 goto errout; 3368 } 3369 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); 3370 return; 3371 errout: 3372 if (err < 0) 3373 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); 3374 } 3375 3376 void neigh_app_ns(struct neighbour *n) 3377 { 3378 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0); 3379 } 3380 EXPORT_SYMBOL(neigh_app_ns); 3381 3382 #ifdef CONFIG_SYSCTL 3383 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); 3384 3385 static int proc_unres_qlen(struct ctl_table *ctl, int write, 3386 void *buffer, size_t *lenp, loff_t *ppos) 3387 { 3388 int size, ret; 3389 struct ctl_table tmp = *ctl; 3390 3391 tmp.extra1 = SYSCTL_ZERO; 3392 tmp.extra2 = &unres_qlen_max; 3393 tmp.data = &size; 3394 3395 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN); 3396 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 3397 3398 if (write && !ret) 3399 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); 3400 return ret; 3401 } 3402 3403 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, 3404 int family) 3405 { 3406 switch (family) { 3407 case AF_INET: 3408 return __in_dev_arp_parms_get_rcu(dev); 3409 case AF_INET6: 3410 return __in6_dev_nd_parms_get_rcu(dev); 3411 } 3412 return NULL; 3413 } 3414 3415 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, 3416 int index) 3417 { 3418 struct net_device *dev; 3419 int family = neigh_parms_family(p); 3420 3421 rcu_read_lock(); 3422 for_each_netdev_rcu(net, dev) { 3423 struct neigh_parms *dst_p = 3424 neigh_get_dev_parms_rcu(dev, family); 3425 3426 if (dst_p && !test_bit(index, dst_p->data_state)) 3427 dst_p->data[index] = p->data[index]; 3428 } 3429 rcu_read_unlock(); 3430 } 3431 3432 static void neigh_proc_update(struct ctl_table *ctl, int write) 3433 { 3434 struct net_device *dev = ctl->extra1; 3435 struct neigh_parms *p = ctl->extra2; 3436 struct net *net = neigh_parms_net(p); 3437 int index = (int *) ctl->data - p->data; 3438 3439 if (!write) 3440 return; 3441 3442 set_bit(index, p->data_state); 3443 if (index == NEIGH_VAR_DELAY_PROBE_TIME) 3444 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); 3445 if (!dev) /* NULL dev means this is default value */ 3446 neigh_copy_dflt_parms(net, p, index); 3447 } 3448 3449 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, 3450 void *buffer, size_t *lenp, 3451 loff_t *ppos) 3452 { 3453 struct ctl_table tmp = *ctl; 3454 int ret; 3455 3456 tmp.extra1 = SYSCTL_ZERO; 3457 tmp.extra2 = SYSCTL_INT_MAX; 3458 3459 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 3460 neigh_proc_update(ctl, write); 3461 return ret; 3462 } 3463 3464 int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer, 3465 size_t *lenp, loff_t *ppos) 3466 { 3467 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 3468 3469 neigh_proc_update(ctl, write); 3470 return ret; 3471 } 3472 EXPORT_SYMBOL(neigh_proc_dointvec); 3473 3474 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer, 3475 size_t *lenp, loff_t *ppos) 3476 { 3477 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); 3478 3479 neigh_proc_update(ctl, write); 3480 return ret; 3481 } 3482 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies); 3483 3484 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write, 3485 void *buffer, size_t *lenp, 3486 loff_t *ppos) 3487 { 3488 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos); 3489 3490 neigh_proc_update(ctl, write); 3491 return ret; 3492 } 3493 3494 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, 3495 void *buffer, size_t *lenp, loff_t *ppos) 3496 { 3497 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); 3498 3499 neigh_proc_update(ctl, write); 3500 return ret; 3501 } 3502 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies); 3503 3504 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, 3505 void *buffer, size_t *lenp, 3506 loff_t *ppos) 3507 { 3508 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos); 3509 3510 neigh_proc_update(ctl, write); 3511 return ret; 3512 } 3513 3514 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write, 3515 void *buffer, size_t *lenp, 3516 loff_t *ppos) 3517 { 3518 struct neigh_parms *p = ctl->extra2; 3519 int ret; 3520 3521 if (strcmp(ctl->procname, "base_reachable_time") == 0) 3522 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); 3523 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0) 3524 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); 3525 else 3526 ret = -1; 3527 3528 if (write && ret == 0) { 3529 /* update reachable_time as well, otherwise, the change will 3530 * only be effective after the next time neigh_periodic_work 3531 * decides to recompute it 3532 */ 3533 p->reachable_time = 3534 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 3535 } 3536 return ret; 3537 } 3538 3539 #define NEIGH_PARMS_DATA_OFFSET(index) \ 3540 (&((struct neigh_parms *) 0)->data[index]) 3541 3542 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \ 3543 [NEIGH_VAR_ ## attr] = { \ 3544 .procname = name, \ 3545 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \ 3546 .maxlen = sizeof(int), \ 3547 .mode = mval, \ 3548 .proc_handler = proc, \ 3549 } 3550 3551 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \ 3552 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax) 3553 3554 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \ 3555 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies) 3556 3557 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \ 3558 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies) 3559 3560 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \ 3561 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies) 3562 3563 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \ 3564 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen) 3565 3566 static struct neigh_sysctl_table { 3567 struct ctl_table_header *sysctl_header; 3568 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; 3569 } neigh_sysctl_template __read_mostly = { 3570 .neigh_vars = { 3571 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), 3572 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"), 3573 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"), 3574 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"), 3575 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), 3576 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), 3577 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), 3578 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"), 3579 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"), 3580 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"), 3581 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"), 3582 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"), 3583 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"), 3584 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"), 3585 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"), 3586 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"), 3587 [NEIGH_VAR_GC_INTERVAL] = { 3588 .procname = "gc_interval", 3589 .maxlen = sizeof(int), 3590 .mode = 0644, 3591 .proc_handler = proc_dointvec_jiffies, 3592 }, 3593 [NEIGH_VAR_GC_THRESH1] = { 3594 .procname = "gc_thresh1", 3595 .maxlen = sizeof(int), 3596 .mode = 0644, 3597 .extra1 = SYSCTL_ZERO, 3598 .extra2 = SYSCTL_INT_MAX, 3599 .proc_handler = proc_dointvec_minmax, 3600 }, 3601 [NEIGH_VAR_GC_THRESH2] = { 3602 .procname = "gc_thresh2", 3603 .maxlen = sizeof(int), 3604 .mode = 0644, 3605 .extra1 = SYSCTL_ZERO, 3606 .extra2 = SYSCTL_INT_MAX, 3607 .proc_handler = proc_dointvec_minmax, 3608 }, 3609 [NEIGH_VAR_GC_THRESH3] = { 3610 .procname = "gc_thresh3", 3611 .maxlen = sizeof(int), 3612 .mode = 0644, 3613 .extra1 = SYSCTL_ZERO, 3614 .extra2 = SYSCTL_INT_MAX, 3615 .proc_handler = proc_dointvec_minmax, 3616 }, 3617 {}, 3618 }, 3619 }; 3620 3621 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, 3622 proc_handler *handler) 3623 { 3624 int i; 3625 struct neigh_sysctl_table *t; 3626 const char *dev_name_source; 3627 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; 3628 char *p_name; 3629 3630 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); 3631 if (!t) 3632 goto err; 3633 3634 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) { 3635 t->neigh_vars[i].data += (long) p; 3636 t->neigh_vars[i].extra1 = dev; 3637 t->neigh_vars[i].extra2 = p; 3638 } 3639 3640 if (dev) { 3641 dev_name_source = dev->name; 3642 /* Terminate the table early */ 3643 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, 3644 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); 3645 } else { 3646 struct neigh_table *tbl = p->tbl; 3647 dev_name_source = "default"; 3648 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval; 3649 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1; 3650 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2; 3651 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3; 3652 } 3653 3654 if (handler) { 3655 /* RetransTime */ 3656 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; 3657 /* ReachableTime */ 3658 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; 3659 /* RetransTime (in milliseconds)*/ 3660 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; 3661 /* ReachableTime (in milliseconds) */ 3662 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; 3663 } else { 3664 /* Those handlers will update p->reachable_time after 3665 * base_reachable_time(_ms) is set to ensure the new timer starts being 3666 * applied after the next neighbour update instead of waiting for 3667 * neigh_periodic_work to update its value (can be multiple minutes) 3668 * So any handler that replaces them should do this as well 3669 */ 3670 /* ReachableTime */ 3671 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = 3672 neigh_proc_base_reachable_time; 3673 /* ReachableTime (in milliseconds) */ 3674 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = 3675 neigh_proc_base_reachable_time; 3676 } 3677 3678 /* Don't export sysctls to unprivileged users */ 3679 if (neigh_parms_net(p)->user_ns != &init_user_ns) 3680 t->neigh_vars[0].procname = NULL; 3681 3682 switch (neigh_parms_family(p)) { 3683 case AF_INET: 3684 p_name = "ipv4"; 3685 break; 3686 case AF_INET6: 3687 p_name = "ipv6"; 3688 break; 3689 default: 3690 BUG(); 3691 } 3692 3693 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", 3694 p_name, dev_name_source); 3695 t->sysctl_header = 3696 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars); 3697 if (!t->sysctl_header) 3698 goto free; 3699 3700 p->sysctl_table = t; 3701 return 0; 3702 3703 free: 3704 kfree(t); 3705 err: 3706 return -ENOBUFS; 3707 } 3708 EXPORT_SYMBOL(neigh_sysctl_register); 3709 3710 void neigh_sysctl_unregister(struct neigh_parms *p) 3711 { 3712 if (p->sysctl_table) { 3713 struct neigh_sysctl_table *t = p->sysctl_table; 3714 p->sysctl_table = NULL; 3715 unregister_net_sysctl_table(t->sysctl_header); 3716 kfree(t); 3717 } 3718 } 3719 EXPORT_SYMBOL(neigh_sysctl_unregister); 3720 3721 #endif /* CONFIG_SYSCTL */ 3722 3723 static int __init neigh_init(void) 3724 { 3725 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0); 3726 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0); 3727 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0); 3728 3729 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, 3730 0); 3731 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0); 3732 3733 return 0; 3734 } 3735 3736 subsys_initcall(neigh_init); 3737