1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Generic address resolution entity 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 8 * 9 * Fixes: 10 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add. 11 * Harald Welte Add neighbour cache statistics like rtstat 12 */ 13 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 16 #include <linux/slab.h> 17 #include <linux/kmemleak.h> 18 #include <linux/types.h> 19 #include <linux/kernel.h> 20 #include <linux/module.h> 21 #include <linux/socket.h> 22 #include <linux/netdevice.h> 23 #include <linux/proc_fs.h> 24 #ifdef CONFIG_SYSCTL 25 #include <linux/sysctl.h> 26 #endif 27 #include <linux/times.h> 28 #include <net/net_namespace.h> 29 #include <net/neighbour.h> 30 #include <net/arp.h> 31 #include <net/dst.h> 32 #include <net/sock.h> 33 #include <net/netevent.h> 34 #include <net/netlink.h> 35 #include <linux/rtnetlink.h> 36 #include <linux/random.h> 37 #include <linux/string.h> 38 #include <linux/log2.h> 39 #include <linux/inetdevice.h> 40 #include <net/addrconf.h> 41 42 #include <trace/events/neigh.h> 43 44 #define DEBUG 45 #define NEIGH_DEBUG 1 46 #define neigh_dbg(level, fmt, ...) \ 47 do { \ 48 if (level <= NEIGH_DEBUG) \ 49 pr_debug(fmt, ##__VA_ARGS__); \ 50 } while (0) 51 52 #define PNEIGH_HASHMASK 0xF 53 54 static void neigh_timer_handler(struct timer_list *t); 55 static void __neigh_notify(struct neighbour *n, int type, int flags, 56 u32 pid); 57 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid); 58 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, 59 struct net_device *dev); 60 61 #ifdef CONFIG_PROC_FS 62 static const struct seq_operations neigh_stat_seq_ops; 63 #endif 64 65 /* 66 Neighbour hash table buckets are protected with rwlock tbl->lock. 67 68 - All the scans/updates to hash buckets MUST be made under this lock. 69 - NOTHING clever should be made under this lock: no callbacks 70 to protocol backends, no attempts to send something to network. 71 It will result in deadlocks, if backend/driver wants to use neighbour 72 cache. 73 - If the entry requires some non-trivial actions, increase 74 its reference count and release table lock. 75 76 Neighbour entries are protected: 77 - with reference count. 78 - with rwlock neigh->lock 79 80 Reference count prevents destruction. 81 82 neigh->lock mainly serializes ll address data and its validity state. 83 However, the same lock is used to protect another entry fields: 84 - timer 85 - resolution queue 86 87 Again, nothing clever shall be made under neigh->lock, 88 the most complicated procedure, which we allow is dev->hard_header. 89 It is supposed, that dev->hard_header is simplistic and does 90 not make callbacks to neighbour tables. 91 */ 92 93 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb) 94 { 95 kfree_skb(skb); 96 return -ENETDOWN; 97 } 98 99 static void neigh_cleanup_and_release(struct neighbour *neigh) 100 { 101 if (neigh->parms->neigh_cleanup) 102 neigh->parms->neigh_cleanup(neigh); 103 104 trace_neigh_cleanup_and_release(neigh, 0); 105 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0); 106 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); 107 neigh_release(neigh); 108 } 109 110 /* 111 * It is random distribution in the interval (1/2)*base...(3/2)*base. 112 * It corresponds to default IPv6 settings and is not overridable, 113 * because it is really reasonable choice. 114 */ 115 116 unsigned long neigh_rand_reach_time(unsigned long base) 117 { 118 return base ? (prandom_u32() % base) + (base >> 1) : 0; 119 } 120 EXPORT_SYMBOL(neigh_rand_reach_time); 121 122 static void neigh_mark_dead(struct neighbour *n) 123 { 124 n->dead = 1; 125 if (!list_empty(&n->gc_list)) { 126 list_del_init(&n->gc_list); 127 atomic_dec(&n->tbl->gc_entries); 128 } 129 } 130 131 static void neigh_update_gc_list(struct neighbour *n) 132 { 133 bool on_gc_list, exempt_from_gc; 134 135 write_lock_bh(&n->tbl->lock); 136 write_lock(&n->lock); 137 138 /* remove from the gc list if new state is permanent or if neighbor 139 * is externally learned; otherwise entry should be on the gc list 140 */ 141 exempt_from_gc = n->nud_state & NUD_PERMANENT || 142 n->flags & NTF_EXT_LEARNED; 143 on_gc_list = !list_empty(&n->gc_list); 144 145 if (exempt_from_gc && on_gc_list) { 146 list_del_init(&n->gc_list); 147 atomic_dec(&n->tbl->gc_entries); 148 } else if (!exempt_from_gc && !on_gc_list) { 149 /* add entries to the tail; cleaning removes from the front */ 150 list_add_tail(&n->gc_list, &n->tbl->gc_list); 151 atomic_inc(&n->tbl->gc_entries); 152 } 153 154 write_unlock(&n->lock); 155 write_unlock_bh(&n->tbl->lock); 156 } 157 158 static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags, 159 int *notify) 160 { 161 bool rc = false; 162 u8 ndm_flags; 163 164 if (!(flags & NEIGH_UPDATE_F_ADMIN)) 165 return rc; 166 167 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; 168 if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { 169 if (ndm_flags & NTF_EXT_LEARNED) 170 neigh->flags |= NTF_EXT_LEARNED; 171 else 172 neigh->flags &= ~NTF_EXT_LEARNED; 173 rc = true; 174 *notify = 1; 175 } 176 177 return rc; 178 } 179 180 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, 181 struct neigh_table *tbl) 182 { 183 bool retval = false; 184 185 write_lock(&n->lock); 186 if (refcount_read(&n->refcnt) == 1) { 187 struct neighbour *neigh; 188 189 neigh = rcu_dereference_protected(n->next, 190 lockdep_is_held(&tbl->lock)); 191 rcu_assign_pointer(*np, neigh); 192 neigh_mark_dead(n); 193 retval = true; 194 } 195 write_unlock(&n->lock); 196 if (retval) 197 neigh_cleanup_and_release(n); 198 return retval; 199 } 200 201 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) 202 { 203 struct neigh_hash_table *nht; 204 void *pkey = ndel->primary_key; 205 u32 hash_val; 206 struct neighbour *n; 207 struct neighbour __rcu **np; 208 209 nht = rcu_dereference_protected(tbl->nht, 210 lockdep_is_held(&tbl->lock)); 211 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd); 212 hash_val = hash_val >> (32 - nht->hash_shift); 213 214 np = &nht->hash_buckets[hash_val]; 215 while ((n = rcu_dereference_protected(*np, 216 lockdep_is_held(&tbl->lock)))) { 217 if (n == ndel) 218 return neigh_del(n, np, tbl); 219 np = &n->next; 220 } 221 return false; 222 } 223 224 static int neigh_forced_gc(struct neigh_table *tbl) 225 { 226 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2; 227 unsigned long tref = jiffies - 5 * HZ; 228 struct neighbour *n, *tmp; 229 int shrunk = 0; 230 231 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); 232 233 write_lock_bh(&tbl->lock); 234 235 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) { 236 if (refcount_read(&n->refcnt) == 1) { 237 bool remove = false; 238 239 write_lock(&n->lock); 240 if ((n->nud_state == NUD_FAILED) || 241 time_after(tref, n->updated)) 242 remove = true; 243 write_unlock(&n->lock); 244 245 if (remove && neigh_remove_one(n, tbl)) 246 shrunk++; 247 if (shrunk >= max_clean) 248 break; 249 } 250 } 251 252 tbl->last_flush = jiffies; 253 254 write_unlock_bh(&tbl->lock); 255 256 return shrunk; 257 } 258 259 static void neigh_add_timer(struct neighbour *n, unsigned long when) 260 { 261 neigh_hold(n); 262 if (unlikely(mod_timer(&n->timer, when))) { 263 printk("NEIGH: BUG, double timer add, state is %x\n", 264 n->nud_state); 265 dump_stack(); 266 } 267 } 268 269 static int neigh_del_timer(struct neighbour *n) 270 { 271 if ((n->nud_state & NUD_IN_TIMER) && 272 del_timer(&n->timer)) { 273 neigh_release(n); 274 return 1; 275 } 276 return 0; 277 } 278 279 static void pneigh_queue_purge(struct sk_buff_head *list) 280 { 281 struct sk_buff *skb; 282 283 while ((skb = skb_dequeue(list)) != NULL) { 284 dev_put(skb->dev); 285 kfree_skb(skb); 286 } 287 } 288 289 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, 290 bool skip_perm) 291 { 292 int i; 293 struct neigh_hash_table *nht; 294 295 nht = rcu_dereference_protected(tbl->nht, 296 lockdep_is_held(&tbl->lock)); 297 298 for (i = 0; i < (1 << nht->hash_shift); i++) { 299 struct neighbour *n; 300 struct neighbour __rcu **np = &nht->hash_buckets[i]; 301 302 while ((n = rcu_dereference_protected(*np, 303 lockdep_is_held(&tbl->lock))) != NULL) { 304 if (dev && n->dev != dev) { 305 np = &n->next; 306 continue; 307 } 308 if (skip_perm && n->nud_state & NUD_PERMANENT) { 309 np = &n->next; 310 continue; 311 } 312 rcu_assign_pointer(*np, 313 rcu_dereference_protected(n->next, 314 lockdep_is_held(&tbl->lock))); 315 write_lock(&n->lock); 316 neigh_del_timer(n); 317 neigh_mark_dead(n); 318 if (refcount_read(&n->refcnt) != 1) { 319 /* The most unpleasant situation. 320 We must destroy neighbour entry, 321 but someone still uses it. 322 323 The destroy will be delayed until 324 the last user releases us, but 325 we must kill timers etc. and move 326 it to safe state. 327 */ 328 __skb_queue_purge(&n->arp_queue); 329 n->arp_queue_len_bytes = 0; 330 n->output = neigh_blackhole; 331 if (n->nud_state & NUD_VALID) 332 n->nud_state = NUD_NOARP; 333 else 334 n->nud_state = NUD_NONE; 335 neigh_dbg(2, "neigh %p is stray\n", n); 336 } 337 write_unlock(&n->lock); 338 neigh_cleanup_and_release(n); 339 } 340 } 341 } 342 343 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) 344 { 345 write_lock_bh(&tbl->lock); 346 neigh_flush_dev(tbl, dev, false); 347 write_unlock_bh(&tbl->lock); 348 } 349 EXPORT_SYMBOL(neigh_changeaddr); 350 351 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, 352 bool skip_perm) 353 { 354 write_lock_bh(&tbl->lock); 355 neigh_flush_dev(tbl, dev, skip_perm); 356 pneigh_ifdown_and_unlock(tbl, dev); 357 358 del_timer_sync(&tbl->proxy_timer); 359 pneigh_queue_purge(&tbl->proxy_queue); 360 return 0; 361 } 362 363 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev) 364 { 365 __neigh_ifdown(tbl, dev, true); 366 return 0; 367 } 368 EXPORT_SYMBOL(neigh_carrier_down); 369 370 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) 371 { 372 __neigh_ifdown(tbl, dev, false); 373 return 0; 374 } 375 EXPORT_SYMBOL(neigh_ifdown); 376 377 static struct neighbour *neigh_alloc(struct neigh_table *tbl, 378 struct net_device *dev, 379 bool exempt_from_gc) 380 { 381 struct neighbour *n = NULL; 382 unsigned long now = jiffies; 383 int entries; 384 385 if (exempt_from_gc) 386 goto do_alloc; 387 388 entries = atomic_inc_return(&tbl->gc_entries) - 1; 389 if (entries >= tbl->gc_thresh3 || 390 (entries >= tbl->gc_thresh2 && 391 time_after(now, tbl->last_flush + 5 * HZ))) { 392 if (!neigh_forced_gc(tbl) && 393 entries >= tbl->gc_thresh3) { 394 net_info_ratelimited("%s: neighbor table overflow!\n", 395 tbl->id); 396 NEIGH_CACHE_STAT_INC(tbl, table_fulls); 397 goto out_entries; 398 } 399 } 400 401 do_alloc: 402 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC); 403 if (!n) 404 goto out_entries; 405 406 __skb_queue_head_init(&n->arp_queue); 407 rwlock_init(&n->lock); 408 seqlock_init(&n->ha_lock); 409 n->updated = n->used = now; 410 n->nud_state = NUD_NONE; 411 n->output = neigh_blackhole; 412 seqlock_init(&n->hh.hh_lock); 413 n->parms = neigh_parms_clone(&tbl->parms); 414 timer_setup(&n->timer, neigh_timer_handler, 0); 415 416 NEIGH_CACHE_STAT_INC(tbl, allocs); 417 n->tbl = tbl; 418 refcount_set(&n->refcnt, 1); 419 n->dead = 1; 420 INIT_LIST_HEAD(&n->gc_list); 421 422 atomic_inc(&tbl->entries); 423 out: 424 return n; 425 426 out_entries: 427 if (!exempt_from_gc) 428 atomic_dec(&tbl->gc_entries); 429 goto out; 430 } 431 432 static void neigh_get_hash_rnd(u32 *x) 433 { 434 *x = get_random_u32() | 1; 435 } 436 437 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) 438 { 439 size_t size = (1 << shift) * sizeof(struct neighbour *); 440 struct neigh_hash_table *ret; 441 struct neighbour __rcu **buckets; 442 int i; 443 444 ret = kmalloc(sizeof(*ret), GFP_ATOMIC); 445 if (!ret) 446 return NULL; 447 if (size <= PAGE_SIZE) { 448 buckets = kzalloc(size, GFP_ATOMIC); 449 } else { 450 buckets = (struct neighbour __rcu **) 451 __get_free_pages(GFP_ATOMIC | __GFP_ZERO, 452 get_order(size)); 453 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); 454 } 455 if (!buckets) { 456 kfree(ret); 457 return NULL; 458 } 459 ret->hash_buckets = buckets; 460 ret->hash_shift = shift; 461 for (i = 0; i < NEIGH_NUM_HASH_RND; i++) 462 neigh_get_hash_rnd(&ret->hash_rnd[i]); 463 return ret; 464 } 465 466 static void neigh_hash_free_rcu(struct rcu_head *head) 467 { 468 struct neigh_hash_table *nht = container_of(head, 469 struct neigh_hash_table, 470 rcu); 471 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); 472 struct neighbour __rcu **buckets = nht->hash_buckets; 473 474 if (size <= PAGE_SIZE) { 475 kfree(buckets); 476 } else { 477 kmemleak_free(buckets); 478 free_pages((unsigned long)buckets, get_order(size)); 479 } 480 kfree(nht); 481 } 482 483 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, 484 unsigned long new_shift) 485 { 486 unsigned int i, hash; 487 struct neigh_hash_table *new_nht, *old_nht; 488 489 NEIGH_CACHE_STAT_INC(tbl, hash_grows); 490 491 old_nht = rcu_dereference_protected(tbl->nht, 492 lockdep_is_held(&tbl->lock)); 493 new_nht = neigh_hash_alloc(new_shift); 494 if (!new_nht) 495 return old_nht; 496 497 for (i = 0; i < (1 << old_nht->hash_shift); i++) { 498 struct neighbour *n, *next; 499 500 for (n = rcu_dereference_protected(old_nht->hash_buckets[i], 501 lockdep_is_held(&tbl->lock)); 502 n != NULL; 503 n = next) { 504 hash = tbl->hash(n->primary_key, n->dev, 505 new_nht->hash_rnd); 506 507 hash >>= (32 - new_nht->hash_shift); 508 next = rcu_dereference_protected(n->next, 509 lockdep_is_held(&tbl->lock)); 510 511 rcu_assign_pointer(n->next, 512 rcu_dereference_protected( 513 new_nht->hash_buckets[hash], 514 lockdep_is_held(&tbl->lock))); 515 rcu_assign_pointer(new_nht->hash_buckets[hash], n); 516 } 517 } 518 519 rcu_assign_pointer(tbl->nht, new_nht); 520 call_rcu(&old_nht->rcu, neigh_hash_free_rcu); 521 return new_nht; 522 } 523 524 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, 525 struct net_device *dev) 526 { 527 struct neighbour *n; 528 529 NEIGH_CACHE_STAT_INC(tbl, lookups); 530 531 rcu_read_lock_bh(); 532 n = __neigh_lookup_noref(tbl, pkey, dev); 533 if (n) { 534 if (!refcount_inc_not_zero(&n->refcnt)) 535 n = NULL; 536 NEIGH_CACHE_STAT_INC(tbl, hits); 537 } 538 539 rcu_read_unlock_bh(); 540 return n; 541 } 542 EXPORT_SYMBOL(neigh_lookup); 543 544 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, 545 const void *pkey) 546 { 547 struct neighbour *n; 548 unsigned int key_len = tbl->key_len; 549 u32 hash_val; 550 struct neigh_hash_table *nht; 551 552 NEIGH_CACHE_STAT_INC(tbl, lookups); 553 554 rcu_read_lock_bh(); 555 nht = rcu_dereference_bh(tbl->nht); 556 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift); 557 558 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); 559 n != NULL; 560 n = rcu_dereference_bh(n->next)) { 561 if (!memcmp(n->primary_key, pkey, key_len) && 562 net_eq(dev_net(n->dev), net)) { 563 if (!refcount_inc_not_zero(&n->refcnt)) 564 n = NULL; 565 NEIGH_CACHE_STAT_INC(tbl, hits); 566 break; 567 } 568 } 569 570 rcu_read_unlock_bh(); 571 return n; 572 } 573 EXPORT_SYMBOL(neigh_lookup_nodev); 574 575 static struct neighbour *___neigh_create(struct neigh_table *tbl, 576 const void *pkey, 577 struct net_device *dev, 578 bool exempt_from_gc, bool want_ref) 579 { 580 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc); 581 u32 hash_val; 582 unsigned int key_len = tbl->key_len; 583 int error; 584 struct neigh_hash_table *nht; 585 586 if (!n) { 587 rc = ERR_PTR(-ENOBUFS); 588 goto out; 589 } 590 591 memcpy(n->primary_key, pkey, key_len); 592 n->dev = dev; 593 dev_hold(dev); 594 595 /* Protocol specific setup. */ 596 if (tbl->constructor && (error = tbl->constructor(n)) < 0) { 597 rc = ERR_PTR(error); 598 goto out_neigh_release; 599 } 600 601 if (dev->netdev_ops->ndo_neigh_construct) { 602 error = dev->netdev_ops->ndo_neigh_construct(dev, n); 603 if (error < 0) { 604 rc = ERR_PTR(error); 605 goto out_neigh_release; 606 } 607 } 608 609 /* Device specific setup. */ 610 if (n->parms->neigh_setup && 611 (error = n->parms->neigh_setup(n)) < 0) { 612 rc = ERR_PTR(error); 613 goto out_neigh_release; 614 } 615 616 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1); 617 618 write_lock_bh(&tbl->lock); 619 nht = rcu_dereference_protected(tbl->nht, 620 lockdep_is_held(&tbl->lock)); 621 622 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) 623 nht = neigh_hash_grow(tbl, nht->hash_shift + 1); 624 625 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift); 626 627 if (n->parms->dead) { 628 rc = ERR_PTR(-EINVAL); 629 goto out_tbl_unlock; 630 } 631 632 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], 633 lockdep_is_held(&tbl->lock)); 634 n1 != NULL; 635 n1 = rcu_dereference_protected(n1->next, 636 lockdep_is_held(&tbl->lock))) { 637 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { 638 if (want_ref) 639 neigh_hold(n1); 640 rc = n1; 641 goto out_tbl_unlock; 642 } 643 } 644 645 n->dead = 0; 646 if (!exempt_from_gc) 647 list_add_tail(&n->gc_list, &n->tbl->gc_list); 648 649 if (want_ref) 650 neigh_hold(n); 651 rcu_assign_pointer(n->next, 652 rcu_dereference_protected(nht->hash_buckets[hash_val], 653 lockdep_is_held(&tbl->lock))); 654 rcu_assign_pointer(nht->hash_buckets[hash_val], n); 655 write_unlock_bh(&tbl->lock); 656 neigh_dbg(2, "neigh %p is created\n", n); 657 rc = n; 658 out: 659 return rc; 660 out_tbl_unlock: 661 write_unlock_bh(&tbl->lock); 662 out_neigh_release: 663 if (!exempt_from_gc) 664 atomic_dec(&tbl->gc_entries); 665 neigh_release(n); 666 goto out; 667 } 668 669 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, 670 struct net_device *dev, bool want_ref) 671 { 672 return ___neigh_create(tbl, pkey, dev, false, want_ref); 673 } 674 EXPORT_SYMBOL(__neigh_create); 675 676 static u32 pneigh_hash(const void *pkey, unsigned int key_len) 677 { 678 u32 hash_val = *(u32 *)(pkey + key_len - 4); 679 hash_val ^= (hash_val >> 16); 680 hash_val ^= hash_val >> 8; 681 hash_val ^= hash_val >> 4; 682 hash_val &= PNEIGH_HASHMASK; 683 return hash_val; 684 } 685 686 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, 687 struct net *net, 688 const void *pkey, 689 unsigned int key_len, 690 struct net_device *dev) 691 { 692 while (n) { 693 if (!memcmp(n->key, pkey, key_len) && 694 net_eq(pneigh_net(n), net) && 695 (n->dev == dev || !n->dev)) 696 return n; 697 n = n->next; 698 } 699 return NULL; 700 } 701 702 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, 703 struct net *net, const void *pkey, struct net_device *dev) 704 { 705 unsigned int key_len = tbl->key_len; 706 u32 hash_val = pneigh_hash(pkey, key_len); 707 708 return __pneigh_lookup_1(tbl->phash_buckets[hash_val], 709 net, pkey, key_len, dev); 710 } 711 EXPORT_SYMBOL_GPL(__pneigh_lookup); 712 713 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, 714 struct net *net, const void *pkey, 715 struct net_device *dev, int creat) 716 { 717 struct pneigh_entry *n; 718 unsigned int key_len = tbl->key_len; 719 u32 hash_val = pneigh_hash(pkey, key_len); 720 721 read_lock_bh(&tbl->lock); 722 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val], 723 net, pkey, key_len, dev); 724 read_unlock_bh(&tbl->lock); 725 726 if (n || !creat) 727 goto out; 728 729 ASSERT_RTNL(); 730 731 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); 732 if (!n) 733 goto out; 734 735 n->protocol = 0; 736 write_pnet(&n->net, net); 737 memcpy(n->key, pkey, key_len); 738 n->dev = dev; 739 if (dev) 740 dev_hold(dev); 741 742 if (tbl->pconstructor && tbl->pconstructor(n)) { 743 if (dev) 744 dev_put(dev); 745 kfree(n); 746 n = NULL; 747 goto out; 748 } 749 750 write_lock_bh(&tbl->lock); 751 n->next = tbl->phash_buckets[hash_val]; 752 tbl->phash_buckets[hash_val] = n; 753 write_unlock_bh(&tbl->lock); 754 out: 755 return n; 756 } 757 EXPORT_SYMBOL(pneigh_lookup); 758 759 760 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, 761 struct net_device *dev) 762 { 763 struct pneigh_entry *n, **np; 764 unsigned int key_len = tbl->key_len; 765 u32 hash_val = pneigh_hash(pkey, key_len); 766 767 write_lock_bh(&tbl->lock); 768 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; 769 np = &n->next) { 770 if (!memcmp(n->key, pkey, key_len) && n->dev == dev && 771 net_eq(pneigh_net(n), net)) { 772 *np = n->next; 773 write_unlock_bh(&tbl->lock); 774 if (tbl->pdestructor) 775 tbl->pdestructor(n); 776 if (n->dev) 777 dev_put(n->dev); 778 kfree(n); 779 return 0; 780 } 781 } 782 write_unlock_bh(&tbl->lock); 783 return -ENOENT; 784 } 785 786 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, 787 struct net_device *dev) 788 { 789 struct pneigh_entry *n, **np, *freelist = NULL; 790 u32 h; 791 792 for (h = 0; h <= PNEIGH_HASHMASK; h++) { 793 np = &tbl->phash_buckets[h]; 794 while ((n = *np) != NULL) { 795 if (!dev || n->dev == dev) { 796 *np = n->next; 797 n->next = freelist; 798 freelist = n; 799 continue; 800 } 801 np = &n->next; 802 } 803 } 804 write_unlock_bh(&tbl->lock); 805 while ((n = freelist)) { 806 freelist = n->next; 807 n->next = NULL; 808 if (tbl->pdestructor) 809 tbl->pdestructor(n); 810 if (n->dev) 811 dev_put(n->dev); 812 kfree(n); 813 } 814 return -ENOENT; 815 } 816 817 static void neigh_parms_destroy(struct neigh_parms *parms); 818 819 static inline void neigh_parms_put(struct neigh_parms *parms) 820 { 821 if (refcount_dec_and_test(&parms->refcnt)) 822 neigh_parms_destroy(parms); 823 } 824 825 /* 826 * neighbour must already be out of the table; 827 * 828 */ 829 void neigh_destroy(struct neighbour *neigh) 830 { 831 struct net_device *dev = neigh->dev; 832 833 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); 834 835 if (!neigh->dead) { 836 pr_warn("Destroying alive neighbour %p\n", neigh); 837 dump_stack(); 838 return; 839 } 840 841 if (neigh_del_timer(neigh)) 842 pr_warn("Impossible event\n"); 843 844 write_lock_bh(&neigh->lock); 845 __skb_queue_purge(&neigh->arp_queue); 846 write_unlock_bh(&neigh->lock); 847 neigh->arp_queue_len_bytes = 0; 848 849 if (dev->netdev_ops->ndo_neigh_destroy) 850 dev->netdev_ops->ndo_neigh_destroy(dev, neigh); 851 852 dev_put(dev); 853 neigh_parms_put(neigh->parms); 854 855 neigh_dbg(2, "neigh %p is destroyed\n", neigh); 856 857 atomic_dec(&neigh->tbl->entries); 858 kfree_rcu(neigh, rcu); 859 } 860 EXPORT_SYMBOL(neigh_destroy); 861 862 /* Neighbour state is suspicious; 863 disable fast path. 864 865 Called with write_locked neigh. 866 */ 867 static void neigh_suspect(struct neighbour *neigh) 868 { 869 neigh_dbg(2, "neigh %p is suspected\n", neigh); 870 871 neigh->output = neigh->ops->output; 872 } 873 874 /* Neighbour state is OK; 875 enable fast path. 876 877 Called with write_locked neigh. 878 */ 879 static void neigh_connect(struct neighbour *neigh) 880 { 881 neigh_dbg(2, "neigh %p is connected\n", neigh); 882 883 neigh->output = neigh->ops->connected_output; 884 } 885 886 static void neigh_periodic_work(struct work_struct *work) 887 { 888 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); 889 struct neighbour *n; 890 struct neighbour __rcu **np; 891 unsigned int i; 892 struct neigh_hash_table *nht; 893 894 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); 895 896 write_lock_bh(&tbl->lock); 897 nht = rcu_dereference_protected(tbl->nht, 898 lockdep_is_held(&tbl->lock)); 899 900 /* 901 * periodically recompute ReachableTime from random function 902 */ 903 904 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) { 905 struct neigh_parms *p; 906 tbl->last_rand = jiffies; 907 list_for_each_entry(p, &tbl->parms_list, list) 908 p->reachable_time = 909 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 910 } 911 912 if (atomic_read(&tbl->entries) < tbl->gc_thresh1) 913 goto out; 914 915 for (i = 0 ; i < (1 << nht->hash_shift); i++) { 916 np = &nht->hash_buckets[i]; 917 918 while ((n = rcu_dereference_protected(*np, 919 lockdep_is_held(&tbl->lock))) != NULL) { 920 unsigned int state; 921 922 write_lock(&n->lock); 923 924 state = n->nud_state; 925 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) || 926 (n->flags & NTF_EXT_LEARNED)) { 927 write_unlock(&n->lock); 928 goto next_elt; 929 } 930 931 if (time_before(n->used, n->confirmed)) 932 n->used = n->confirmed; 933 934 if (refcount_read(&n->refcnt) == 1 && 935 (state == NUD_FAILED || 936 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { 937 *np = n->next; 938 neigh_mark_dead(n); 939 write_unlock(&n->lock); 940 neigh_cleanup_and_release(n); 941 continue; 942 } 943 write_unlock(&n->lock); 944 945 next_elt: 946 np = &n->next; 947 } 948 /* 949 * It's fine to release lock here, even if hash table 950 * grows while we are preempted. 951 */ 952 write_unlock_bh(&tbl->lock); 953 cond_resched(); 954 write_lock_bh(&tbl->lock); 955 nht = rcu_dereference_protected(tbl->nht, 956 lockdep_is_held(&tbl->lock)); 957 } 958 out: 959 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks. 960 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2 961 * BASE_REACHABLE_TIME. 962 */ 963 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 964 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1); 965 write_unlock_bh(&tbl->lock); 966 } 967 968 static __inline__ int neigh_max_probes(struct neighbour *n) 969 { 970 struct neigh_parms *p = n->parms; 971 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + 972 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) : 973 NEIGH_VAR(p, MCAST_PROBES)); 974 } 975 976 static void neigh_invalidate(struct neighbour *neigh) 977 __releases(neigh->lock) 978 __acquires(neigh->lock) 979 { 980 struct sk_buff *skb; 981 982 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); 983 neigh_dbg(2, "neigh %p is failed\n", neigh); 984 neigh->updated = jiffies; 985 986 /* It is very thin place. report_unreachable is very complicated 987 routine. Particularly, it can hit the same neighbour entry! 988 989 So that, we try to be accurate and avoid dead loop. --ANK 990 */ 991 while (neigh->nud_state == NUD_FAILED && 992 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { 993 write_unlock(&neigh->lock); 994 neigh->ops->error_report(neigh, skb); 995 write_lock(&neigh->lock); 996 } 997 __skb_queue_purge(&neigh->arp_queue); 998 neigh->arp_queue_len_bytes = 0; 999 } 1000 1001 static void neigh_probe(struct neighbour *neigh) 1002 __releases(neigh->lock) 1003 { 1004 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue); 1005 /* keep skb alive even if arp_queue overflows */ 1006 if (skb) 1007 skb = skb_clone(skb, GFP_ATOMIC); 1008 write_unlock(&neigh->lock); 1009 if (neigh->ops->solicit) 1010 neigh->ops->solicit(neigh, skb); 1011 atomic_inc(&neigh->probes); 1012 consume_skb(skb); 1013 } 1014 1015 /* Called when a timer expires for a neighbour entry. */ 1016 1017 static void neigh_timer_handler(struct timer_list *t) 1018 { 1019 unsigned long now, next; 1020 struct neighbour *neigh = from_timer(neigh, t, timer); 1021 unsigned int state; 1022 int notify = 0; 1023 1024 write_lock(&neigh->lock); 1025 1026 state = neigh->nud_state; 1027 now = jiffies; 1028 next = now + HZ; 1029 1030 if (!(state & NUD_IN_TIMER)) 1031 goto out; 1032 1033 if (state & NUD_REACHABLE) { 1034 if (time_before_eq(now, 1035 neigh->confirmed + neigh->parms->reachable_time)) { 1036 neigh_dbg(2, "neigh %p is still alive\n", neigh); 1037 next = neigh->confirmed + neigh->parms->reachable_time; 1038 } else if (time_before_eq(now, 1039 neigh->used + 1040 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { 1041 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1042 neigh->nud_state = NUD_DELAY; 1043 neigh->updated = jiffies; 1044 neigh_suspect(neigh); 1045 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); 1046 } else { 1047 neigh_dbg(2, "neigh %p is suspected\n", neigh); 1048 neigh->nud_state = NUD_STALE; 1049 neigh->updated = jiffies; 1050 neigh_suspect(neigh); 1051 notify = 1; 1052 } 1053 } else if (state & NUD_DELAY) { 1054 if (time_before_eq(now, 1055 neigh->confirmed + 1056 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { 1057 neigh_dbg(2, "neigh %p is now reachable\n", neigh); 1058 neigh->nud_state = NUD_REACHABLE; 1059 neigh->updated = jiffies; 1060 neigh_connect(neigh); 1061 notify = 1; 1062 next = neigh->confirmed + neigh->parms->reachable_time; 1063 } else { 1064 neigh_dbg(2, "neigh %p is probed\n", neigh); 1065 neigh->nud_state = NUD_PROBE; 1066 neigh->updated = jiffies; 1067 atomic_set(&neigh->probes, 0); 1068 notify = 1; 1069 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); 1070 } 1071 } else { 1072 /* NUD_PROBE|NUD_INCOMPLETE */ 1073 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); 1074 } 1075 1076 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && 1077 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { 1078 neigh->nud_state = NUD_FAILED; 1079 notify = 1; 1080 neigh_invalidate(neigh); 1081 goto out; 1082 } 1083 1084 if (neigh->nud_state & NUD_IN_TIMER) { 1085 if (time_before(next, jiffies + HZ/2)) 1086 next = jiffies + HZ/2; 1087 if (!mod_timer(&neigh->timer, next)) 1088 neigh_hold(neigh); 1089 } 1090 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { 1091 neigh_probe(neigh); 1092 } else { 1093 out: 1094 write_unlock(&neigh->lock); 1095 } 1096 1097 if (notify) 1098 neigh_update_notify(neigh, 0); 1099 1100 trace_neigh_timer_handler(neigh, 0); 1101 1102 neigh_release(neigh); 1103 } 1104 1105 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) 1106 { 1107 int rc; 1108 bool immediate_probe = false; 1109 1110 write_lock_bh(&neigh->lock); 1111 1112 rc = 0; 1113 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) 1114 goto out_unlock_bh; 1115 if (neigh->dead) 1116 goto out_dead; 1117 1118 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { 1119 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + 1120 NEIGH_VAR(neigh->parms, APP_PROBES)) { 1121 unsigned long next, now = jiffies; 1122 1123 atomic_set(&neigh->probes, 1124 NEIGH_VAR(neigh->parms, UCAST_PROBES)); 1125 neigh->nud_state = NUD_INCOMPLETE; 1126 neigh->updated = now; 1127 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), 1128 HZ/2); 1129 neigh_add_timer(neigh, next); 1130 immediate_probe = true; 1131 } else { 1132 neigh->nud_state = NUD_FAILED; 1133 neigh->updated = jiffies; 1134 write_unlock_bh(&neigh->lock); 1135 1136 kfree_skb(skb); 1137 return 1; 1138 } 1139 } else if (neigh->nud_state & NUD_STALE) { 1140 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1141 neigh->nud_state = NUD_DELAY; 1142 neigh->updated = jiffies; 1143 neigh_add_timer(neigh, jiffies + 1144 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME)); 1145 } 1146 1147 if (neigh->nud_state == NUD_INCOMPLETE) { 1148 if (skb) { 1149 while (neigh->arp_queue_len_bytes + skb->truesize > 1150 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) { 1151 struct sk_buff *buff; 1152 1153 buff = __skb_dequeue(&neigh->arp_queue); 1154 if (!buff) 1155 break; 1156 neigh->arp_queue_len_bytes -= buff->truesize; 1157 kfree_skb(buff); 1158 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); 1159 } 1160 skb_dst_force(skb); 1161 __skb_queue_tail(&neigh->arp_queue, skb); 1162 neigh->arp_queue_len_bytes += skb->truesize; 1163 } 1164 rc = 1; 1165 } 1166 out_unlock_bh: 1167 if (immediate_probe) 1168 neigh_probe(neigh); 1169 else 1170 write_unlock(&neigh->lock); 1171 local_bh_enable(); 1172 trace_neigh_event_send_done(neigh, rc); 1173 return rc; 1174 1175 out_dead: 1176 if (neigh->nud_state & NUD_STALE) 1177 goto out_unlock_bh; 1178 write_unlock_bh(&neigh->lock); 1179 kfree_skb(skb); 1180 trace_neigh_event_send_dead(neigh, 1); 1181 return 1; 1182 } 1183 EXPORT_SYMBOL(__neigh_event_send); 1184 1185 static void neigh_update_hhs(struct neighbour *neigh) 1186 { 1187 struct hh_cache *hh; 1188 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) 1189 = NULL; 1190 1191 if (neigh->dev->header_ops) 1192 update = neigh->dev->header_ops->cache_update; 1193 1194 if (update) { 1195 hh = &neigh->hh; 1196 if (hh->hh_len) { 1197 write_seqlock_bh(&hh->hh_lock); 1198 update(hh, neigh->dev, neigh->ha); 1199 write_sequnlock_bh(&hh->hh_lock); 1200 } 1201 } 1202 } 1203 1204 1205 1206 /* Generic update routine. 1207 -- lladdr is new lladdr or NULL, if it is not supplied. 1208 -- new is new state. 1209 -- flags 1210 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr, 1211 if it is different. 1212 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected" 1213 lladdr instead of overriding it 1214 if it is different. 1215 NEIGH_UPDATE_F_ADMIN means that the change is administrative. 1216 1217 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing 1218 NTF_ROUTER flag. 1219 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as 1220 a router. 1221 1222 Caller MUST hold reference count on the entry. 1223 */ 1224 1225 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, 1226 u8 new, u32 flags, u32 nlmsg_pid, 1227 struct netlink_ext_ack *extack) 1228 { 1229 bool ext_learn_change = false; 1230 u8 old; 1231 int err; 1232 int notify = 0; 1233 struct net_device *dev; 1234 int update_isrouter = 0; 1235 1236 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid); 1237 1238 write_lock_bh(&neigh->lock); 1239 1240 dev = neigh->dev; 1241 old = neigh->nud_state; 1242 err = -EPERM; 1243 1244 if (!(flags & NEIGH_UPDATE_F_ADMIN) && 1245 (old & (NUD_NOARP | NUD_PERMANENT))) 1246 goto out; 1247 if (neigh->dead) { 1248 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead"); 1249 goto out; 1250 } 1251 1252 ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); 1253 1254 if (!(new & NUD_VALID)) { 1255 neigh_del_timer(neigh); 1256 if (old & NUD_CONNECTED) 1257 neigh_suspect(neigh); 1258 neigh->nud_state = new; 1259 err = 0; 1260 notify = old & NUD_VALID; 1261 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && 1262 (new & NUD_FAILED)) { 1263 neigh_invalidate(neigh); 1264 notify = 1; 1265 } 1266 goto out; 1267 } 1268 1269 /* Compare new lladdr with cached one */ 1270 if (!dev->addr_len) { 1271 /* First case: device needs no address. */ 1272 lladdr = neigh->ha; 1273 } else if (lladdr) { 1274 /* The second case: if something is already cached 1275 and a new address is proposed: 1276 - compare new & old 1277 - if they are different, check override flag 1278 */ 1279 if ((old & NUD_VALID) && 1280 !memcmp(lladdr, neigh->ha, dev->addr_len)) 1281 lladdr = neigh->ha; 1282 } else { 1283 /* No address is supplied; if we know something, 1284 use it, otherwise discard the request. 1285 */ 1286 err = -EINVAL; 1287 if (!(old & NUD_VALID)) { 1288 NL_SET_ERR_MSG(extack, "No link layer address given"); 1289 goto out; 1290 } 1291 lladdr = neigh->ha; 1292 } 1293 1294 /* Update confirmed timestamp for neighbour entry after we 1295 * received ARP packet even if it doesn't change IP to MAC binding. 1296 */ 1297 if (new & NUD_CONNECTED) 1298 neigh->confirmed = jiffies; 1299 1300 /* If entry was valid and address is not changed, 1301 do not change entry state, if new one is STALE. 1302 */ 1303 err = 0; 1304 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER; 1305 if (old & NUD_VALID) { 1306 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) { 1307 update_isrouter = 0; 1308 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) && 1309 (old & NUD_CONNECTED)) { 1310 lladdr = neigh->ha; 1311 new = NUD_STALE; 1312 } else 1313 goto out; 1314 } else { 1315 if (lladdr == neigh->ha && new == NUD_STALE && 1316 !(flags & NEIGH_UPDATE_F_ADMIN)) 1317 new = old; 1318 } 1319 } 1320 1321 /* Update timestamp only once we know we will make a change to the 1322 * neighbour entry. Otherwise we risk to move the locktime window with 1323 * noop updates and ignore relevant ARP updates. 1324 */ 1325 if (new != old || lladdr != neigh->ha) 1326 neigh->updated = jiffies; 1327 1328 if (new != old) { 1329 neigh_del_timer(neigh); 1330 if (new & NUD_PROBE) 1331 atomic_set(&neigh->probes, 0); 1332 if (new & NUD_IN_TIMER) 1333 neigh_add_timer(neigh, (jiffies + 1334 ((new & NUD_REACHABLE) ? 1335 neigh->parms->reachable_time : 1336 0))); 1337 neigh->nud_state = new; 1338 notify = 1; 1339 } 1340 1341 if (lladdr != neigh->ha) { 1342 write_seqlock(&neigh->ha_lock); 1343 memcpy(&neigh->ha, lladdr, dev->addr_len); 1344 write_sequnlock(&neigh->ha_lock); 1345 neigh_update_hhs(neigh); 1346 if (!(new & NUD_CONNECTED)) 1347 neigh->confirmed = jiffies - 1348 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1); 1349 notify = 1; 1350 } 1351 if (new == old) 1352 goto out; 1353 if (new & NUD_CONNECTED) 1354 neigh_connect(neigh); 1355 else 1356 neigh_suspect(neigh); 1357 if (!(old & NUD_VALID)) { 1358 struct sk_buff *skb; 1359 1360 /* Again: avoid dead loop if something went wrong */ 1361 1362 while (neigh->nud_state & NUD_VALID && 1363 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { 1364 struct dst_entry *dst = skb_dst(skb); 1365 struct neighbour *n2, *n1 = neigh; 1366 write_unlock_bh(&neigh->lock); 1367 1368 rcu_read_lock(); 1369 1370 /* Why not just use 'neigh' as-is? The problem is that 1371 * things such as shaper, eql, and sch_teql can end up 1372 * using alternative, different, neigh objects to output 1373 * the packet in the output path. So what we need to do 1374 * here is re-lookup the top-level neigh in the path so 1375 * we can reinject the packet there. 1376 */ 1377 n2 = NULL; 1378 if (dst) { 1379 n2 = dst_neigh_lookup_skb(dst, skb); 1380 if (n2) 1381 n1 = n2; 1382 } 1383 n1->output(n1, skb); 1384 if (n2) 1385 neigh_release(n2); 1386 rcu_read_unlock(); 1387 1388 write_lock_bh(&neigh->lock); 1389 } 1390 __skb_queue_purge(&neigh->arp_queue); 1391 neigh->arp_queue_len_bytes = 0; 1392 } 1393 out: 1394 if (update_isrouter) 1395 neigh_update_is_router(neigh, flags, ¬ify); 1396 write_unlock_bh(&neigh->lock); 1397 1398 if (((new ^ old) & NUD_PERMANENT) || ext_learn_change) 1399 neigh_update_gc_list(neigh); 1400 1401 if (notify) 1402 neigh_update_notify(neigh, nlmsg_pid); 1403 1404 trace_neigh_update_done(neigh, err); 1405 1406 return err; 1407 } 1408 1409 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, 1410 u32 flags, u32 nlmsg_pid) 1411 { 1412 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL); 1413 } 1414 EXPORT_SYMBOL(neigh_update); 1415 1416 /* Update the neigh to listen temporarily for probe responses, even if it is 1417 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing. 1418 */ 1419 void __neigh_set_probe_once(struct neighbour *neigh) 1420 { 1421 if (neigh->dead) 1422 return; 1423 neigh->updated = jiffies; 1424 if (!(neigh->nud_state & NUD_FAILED)) 1425 return; 1426 neigh->nud_state = NUD_INCOMPLETE; 1427 atomic_set(&neigh->probes, neigh_max_probes(neigh)); 1428 neigh_add_timer(neigh, 1429 jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME)); 1430 } 1431 EXPORT_SYMBOL(__neigh_set_probe_once); 1432 1433 struct neighbour *neigh_event_ns(struct neigh_table *tbl, 1434 u8 *lladdr, void *saddr, 1435 struct net_device *dev) 1436 { 1437 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev, 1438 lladdr || !dev->addr_len); 1439 if (neigh) 1440 neigh_update(neigh, lladdr, NUD_STALE, 1441 NEIGH_UPDATE_F_OVERRIDE, 0); 1442 return neigh; 1443 } 1444 EXPORT_SYMBOL(neigh_event_ns); 1445 1446 /* called with read_lock_bh(&n->lock); */ 1447 static void neigh_hh_init(struct neighbour *n) 1448 { 1449 struct net_device *dev = n->dev; 1450 __be16 prot = n->tbl->protocol; 1451 struct hh_cache *hh = &n->hh; 1452 1453 write_lock_bh(&n->lock); 1454 1455 /* Only one thread can come in here and initialize the 1456 * hh_cache entry. 1457 */ 1458 if (!hh->hh_len) 1459 dev->header_ops->cache(n, hh, prot); 1460 1461 write_unlock_bh(&n->lock); 1462 } 1463 1464 /* Slow and careful. */ 1465 1466 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) 1467 { 1468 int rc = 0; 1469 1470 if (!neigh_event_send(neigh, skb)) { 1471 int err; 1472 struct net_device *dev = neigh->dev; 1473 unsigned int seq; 1474 1475 if (dev->header_ops->cache && !neigh->hh.hh_len) 1476 neigh_hh_init(neigh); 1477 1478 do { 1479 __skb_pull(skb, skb_network_offset(skb)); 1480 seq = read_seqbegin(&neigh->ha_lock); 1481 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1482 neigh->ha, NULL, skb->len); 1483 } while (read_seqretry(&neigh->ha_lock, seq)); 1484 1485 if (err >= 0) 1486 rc = dev_queue_xmit(skb); 1487 else 1488 goto out_kfree_skb; 1489 } 1490 out: 1491 return rc; 1492 out_kfree_skb: 1493 rc = -EINVAL; 1494 kfree_skb(skb); 1495 goto out; 1496 } 1497 EXPORT_SYMBOL(neigh_resolve_output); 1498 1499 /* As fast as possible without hh cache */ 1500 1501 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb) 1502 { 1503 struct net_device *dev = neigh->dev; 1504 unsigned int seq; 1505 int err; 1506 1507 do { 1508 __skb_pull(skb, skb_network_offset(skb)); 1509 seq = read_seqbegin(&neigh->ha_lock); 1510 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1511 neigh->ha, NULL, skb->len); 1512 } while (read_seqretry(&neigh->ha_lock, seq)); 1513 1514 if (err >= 0) 1515 err = dev_queue_xmit(skb); 1516 else { 1517 err = -EINVAL; 1518 kfree_skb(skb); 1519 } 1520 return err; 1521 } 1522 EXPORT_SYMBOL(neigh_connected_output); 1523 1524 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb) 1525 { 1526 return dev_queue_xmit(skb); 1527 } 1528 EXPORT_SYMBOL(neigh_direct_output); 1529 1530 static void neigh_proxy_process(struct timer_list *t) 1531 { 1532 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer); 1533 long sched_next = 0; 1534 unsigned long now = jiffies; 1535 struct sk_buff *skb, *n; 1536 1537 spin_lock(&tbl->proxy_queue.lock); 1538 1539 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) { 1540 long tdif = NEIGH_CB(skb)->sched_next - now; 1541 1542 if (tdif <= 0) { 1543 struct net_device *dev = skb->dev; 1544 1545 __skb_unlink(skb, &tbl->proxy_queue); 1546 if (tbl->proxy_redo && netif_running(dev)) { 1547 rcu_read_lock(); 1548 tbl->proxy_redo(skb); 1549 rcu_read_unlock(); 1550 } else { 1551 kfree_skb(skb); 1552 } 1553 1554 dev_put(dev); 1555 } else if (!sched_next || tdif < sched_next) 1556 sched_next = tdif; 1557 } 1558 del_timer(&tbl->proxy_timer); 1559 if (sched_next) 1560 mod_timer(&tbl->proxy_timer, jiffies + sched_next); 1561 spin_unlock(&tbl->proxy_queue.lock); 1562 } 1563 1564 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, 1565 struct sk_buff *skb) 1566 { 1567 unsigned long now = jiffies; 1568 1569 unsigned long sched_next = now + (prandom_u32() % 1570 NEIGH_VAR(p, PROXY_DELAY)); 1571 1572 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { 1573 kfree_skb(skb); 1574 return; 1575 } 1576 1577 NEIGH_CB(skb)->sched_next = sched_next; 1578 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED; 1579 1580 spin_lock(&tbl->proxy_queue.lock); 1581 if (del_timer(&tbl->proxy_timer)) { 1582 if (time_before(tbl->proxy_timer.expires, sched_next)) 1583 sched_next = tbl->proxy_timer.expires; 1584 } 1585 skb_dst_drop(skb); 1586 dev_hold(skb->dev); 1587 __skb_queue_tail(&tbl->proxy_queue, skb); 1588 mod_timer(&tbl->proxy_timer, sched_next); 1589 spin_unlock(&tbl->proxy_queue.lock); 1590 } 1591 EXPORT_SYMBOL(pneigh_enqueue); 1592 1593 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, 1594 struct net *net, int ifindex) 1595 { 1596 struct neigh_parms *p; 1597 1598 list_for_each_entry(p, &tbl->parms_list, list) { 1599 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) || 1600 (!p->dev && !ifindex && net_eq(net, &init_net))) 1601 return p; 1602 } 1603 1604 return NULL; 1605 } 1606 1607 struct neigh_parms *neigh_parms_alloc(struct net_device *dev, 1608 struct neigh_table *tbl) 1609 { 1610 struct neigh_parms *p; 1611 struct net *net = dev_net(dev); 1612 const struct net_device_ops *ops = dev->netdev_ops; 1613 1614 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); 1615 if (p) { 1616 p->tbl = tbl; 1617 refcount_set(&p->refcnt, 1); 1618 p->reachable_time = 1619 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 1620 dev_hold(dev); 1621 p->dev = dev; 1622 write_pnet(&p->net, net); 1623 p->sysctl_table = NULL; 1624 1625 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { 1626 dev_put(dev); 1627 kfree(p); 1628 return NULL; 1629 } 1630 1631 write_lock_bh(&tbl->lock); 1632 list_add(&p->list, &tbl->parms.list); 1633 write_unlock_bh(&tbl->lock); 1634 1635 neigh_parms_data_state_cleanall(p); 1636 } 1637 return p; 1638 } 1639 EXPORT_SYMBOL(neigh_parms_alloc); 1640 1641 static void neigh_rcu_free_parms(struct rcu_head *head) 1642 { 1643 struct neigh_parms *parms = 1644 container_of(head, struct neigh_parms, rcu_head); 1645 1646 neigh_parms_put(parms); 1647 } 1648 1649 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) 1650 { 1651 if (!parms || parms == &tbl->parms) 1652 return; 1653 write_lock_bh(&tbl->lock); 1654 list_del(&parms->list); 1655 parms->dead = 1; 1656 write_unlock_bh(&tbl->lock); 1657 if (parms->dev) 1658 dev_put(parms->dev); 1659 call_rcu(&parms->rcu_head, neigh_rcu_free_parms); 1660 } 1661 EXPORT_SYMBOL(neigh_parms_release); 1662 1663 static void neigh_parms_destroy(struct neigh_parms *parms) 1664 { 1665 kfree(parms); 1666 } 1667 1668 static struct lock_class_key neigh_table_proxy_queue_class; 1669 1670 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly; 1671 1672 void neigh_table_init(int index, struct neigh_table *tbl) 1673 { 1674 unsigned long now = jiffies; 1675 unsigned long phsize; 1676 1677 INIT_LIST_HEAD(&tbl->parms_list); 1678 INIT_LIST_HEAD(&tbl->gc_list); 1679 list_add(&tbl->parms.list, &tbl->parms_list); 1680 write_pnet(&tbl->parms.net, &init_net); 1681 refcount_set(&tbl->parms.refcnt, 1); 1682 tbl->parms.reachable_time = 1683 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); 1684 1685 tbl->stats = alloc_percpu(struct neigh_statistics); 1686 if (!tbl->stats) 1687 panic("cannot create neighbour cache statistics"); 1688 1689 #ifdef CONFIG_PROC_FS 1690 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat, 1691 &neigh_stat_seq_ops, tbl)) 1692 panic("cannot create neighbour proc dir entry"); 1693 #endif 1694 1695 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3)); 1696 1697 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); 1698 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); 1699 1700 if (!tbl->nht || !tbl->phash_buckets) 1701 panic("cannot allocate neighbour cache hashes"); 1702 1703 if (!tbl->entry_size) 1704 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) + 1705 tbl->key_len, NEIGH_PRIV_ALIGN); 1706 else 1707 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); 1708 1709 rwlock_init(&tbl->lock); 1710 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); 1711 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 1712 tbl->parms.reachable_time); 1713 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0); 1714 skb_queue_head_init_class(&tbl->proxy_queue, 1715 &neigh_table_proxy_queue_class); 1716 1717 tbl->last_flush = now; 1718 tbl->last_rand = now + tbl->parms.reachable_time * 20; 1719 1720 neigh_tables[index] = tbl; 1721 } 1722 EXPORT_SYMBOL(neigh_table_init); 1723 1724 int neigh_table_clear(int index, struct neigh_table *tbl) 1725 { 1726 neigh_tables[index] = NULL; 1727 /* It is not clean... Fix it to unload IPv6 module safely */ 1728 cancel_delayed_work_sync(&tbl->gc_work); 1729 del_timer_sync(&tbl->proxy_timer); 1730 pneigh_queue_purge(&tbl->proxy_queue); 1731 neigh_ifdown(tbl, NULL); 1732 if (atomic_read(&tbl->entries)) 1733 pr_crit("neighbour leakage\n"); 1734 1735 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, 1736 neigh_hash_free_rcu); 1737 tbl->nht = NULL; 1738 1739 kfree(tbl->phash_buckets); 1740 tbl->phash_buckets = NULL; 1741 1742 remove_proc_entry(tbl->id, init_net.proc_net_stat); 1743 1744 free_percpu(tbl->stats); 1745 tbl->stats = NULL; 1746 1747 return 0; 1748 } 1749 EXPORT_SYMBOL(neigh_table_clear); 1750 1751 static struct neigh_table *neigh_find_table(int family) 1752 { 1753 struct neigh_table *tbl = NULL; 1754 1755 switch (family) { 1756 case AF_INET: 1757 tbl = neigh_tables[NEIGH_ARP_TABLE]; 1758 break; 1759 case AF_INET6: 1760 tbl = neigh_tables[NEIGH_ND_TABLE]; 1761 break; 1762 case AF_DECnet: 1763 tbl = neigh_tables[NEIGH_DN_TABLE]; 1764 break; 1765 } 1766 1767 return tbl; 1768 } 1769 1770 const struct nla_policy nda_policy[NDA_MAX+1] = { 1771 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, 1772 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, 1773 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) }, 1774 [NDA_PROBES] = { .type = NLA_U32 }, 1775 [NDA_VLAN] = { .type = NLA_U16 }, 1776 [NDA_PORT] = { .type = NLA_U16 }, 1777 [NDA_VNI] = { .type = NLA_U32 }, 1778 [NDA_IFINDEX] = { .type = NLA_U32 }, 1779 [NDA_MASTER] = { .type = NLA_U32 }, 1780 [NDA_PROTOCOL] = { .type = NLA_U8 }, 1781 }; 1782 1783 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, 1784 struct netlink_ext_ack *extack) 1785 { 1786 struct net *net = sock_net(skb->sk); 1787 struct ndmsg *ndm; 1788 struct nlattr *dst_attr; 1789 struct neigh_table *tbl; 1790 struct neighbour *neigh; 1791 struct net_device *dev = NULL; 1792 int err = -EINVAL; 1793 1794 ASSERT_RTNL(); 1795 if (nlmsg_len(nlh) < sizeof(*ndm)) 1796 goto out; 1797 1798 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST); 1799 if (!dst_attr) { 1800 NL_SET_ERR_MSG(extack, "Network address not specified"); 1801 goto out; 1802 } 1803 1804 ndm = nlmsg_data(nlh); 1805 if (ndm->ndm_ifindex) { 1806 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 1807 if (dev == NULL) { 1808 err = -ENODEV; 1809 goto out; 1810 } 1811 } 1812 1813 tbl = neigh_find_table(ndm->ndm_family); 1814 if (tbl == NULL) 1815 return -EAFNOSUPPORT; 1816 1817 if (nla_len(dst_attr) < (int)tbl->key_len) { 1818 NL_SET_ERR_MSG(extack, "Invalid network address"); 1819 goto out; 1820 } 1821 1822 if (ndm->ndm_flags & NTF_PROXY) { 1823 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); 1824 goto out; 1825 } 1826 1827 if (dev == NULL) 1828 goto out; 1829 1830 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); 1831 if (neigh == NULL) { 1832 err = -ENOENT; 1833 goto out; 1834 } 1835 1836 err = __neigh_update(neigh, NULL, NUD_FAILED, 1837 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, 1838 NETLINK_CB(skb).portid, extack); 1839 write_lock_bh(&tbl->lock); 1840 neigh_release(neigh); 1841 neigh_remove_one(neigh, tbl); 1842 write_unlock_bh(&tbl->lock); 1843 1844 out: 1845 return err; 1846 } 1847 1848 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, 1849 struct netlink_ext_ack *extack) 1850 { 1851 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE | 1852 NEIGH_UPDATE_F_OVERRIDE_ISROUTER; 1853 struct net *net = sock_net(skb->sk); 1854 struct ndmsg *ndm; 1855 struct nlattr *tb[NDA_MAX+1]; 1856 struct neigh_table *tbl; 1857 struct net_device *dev = NULL; 1858 struct neighbour *neigh; 1859 void *dst, *lladdr; 1860 u8 protocol = 0; 1861 int err; 1862 1863 ASSERT_RTNL(); 1864 err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, 1865 nda_policy, extack); 1866 if (err < 0) 1867 goto out; 1868 1869 err = -EINVAL; 1870 if (!tb[NDA_DST]) { 1871 NL_SET_ERR_MSG(extack, "Network address not specified"); 1872 goto out; 1873 } 1874 1875 ndm = nlmsg_data(nlh); 1876 if (ndm->ndm_ifindex) { 1877 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 1878 if (dev == NULL) { 1879 err = -ENODEV; 1880 goto out; 1881 } 1882 1883 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) { 1884 NL_SET_ERR_MSG(extack, "Invalid link address"); 1885 goto out; 1886 } 1887 } 1888 1889 tbl = neigh_find_table(ndm->ndm_family); 1890 if (tbl == NULL) 1891 return -EAFNOSUPPORT; 1892 1893 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) { 1894 NL_SET_ERR_MSG(extack, "Invalid network address"); 1895 goto out; 1896 } 1897 1898 dst = nla_data(tb[NDA_DST]); 1899 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; 1900 1901 if (tb[NDA_PROTOCOL]) 1902 protocol = nla_get_u8(tb[NDA_PROTOCOL]); 1903 1904 if (ndm->ndm_flags & NTF_PROXY) { 1905 struct pneigh_entry *pn; 1906 1907 err = -ENOBUFS; 1908 pn = pneigh_lookup(tbl, net, dst, dev, 1); 1909 if (pn) { 1910 pn->flags = ndm->ndm_flags; 1911 if (protocol) 1912 pn->protocol = protocol; 1913 err = 0; 1914 } 1915 goto out; 1916 } 1917 1918 if (!dev) { 1919 NL_SET_ERR_MSG(extack, "Device not specified"); 1920 goto out; 1921 } 1922 1923 if (tbl->allow_add && !tbl->allow_add(dev, extack)) { 1924 err = -EINVAL; 1925 goto out; 1926 } 1927 1928 neigh = neigh_lookup(tbl, dst, dev); 1929 if (neigh == NULL) { 1930 bool exempt_from_gc; 1931 1932 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { 1933 err = -ENOENT; 1934 goto out; 1935 } 1936 1937 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || 1938 ndm->ndm_flags & NTF_EXT_LEARNED; 1939 neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true); 1940 if (IS_ERR(neigh)) { 1941 err = PTR_ERR(neigh); 1942 goto out; 1943 } 1944 } else { 1945 if (nlh->nlmsg_flags & NLM_F_EXCL) { 1946 err = -EEXIST; 1947 neigh_release(neigh); 1948 goto out; 1949 } 1950 1951 if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) 1952 flags &= ~(NEIGH_UPDATE_F_OVERRIDE | 1953 NEIGH_UPDATE_F_OVERRIDE_ISROUTER); 1954 } 1955 1956 if (ndm->ndm_flags & NTF_EXT_LEARNED) 1957 flags |= NEIGH_UPDATE_F_EXT_LEARNED; 1958 1959 if (ndm->ndm_flags & NTF_ROUTER) 1960 flags |= NEIGH_UPDATE_F_ISROUTER; 1961 1962 if (ndm->ndm_flags & NTF_USE) { 1963 neigh_event_send(neigh, NULL); 1964 err = 0; 1965 } else 1966 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, 1967 NETLINK_CB(skb).portid, extack); 1968 1969 if (protocol) 1970 neigh->protocol = protocol; 1971 1972 neigh_release(neigh); 1973 1974 out: 1975 return err; 1976 } 1977 1978 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) 1979 { 1980 struct nlattr *nest; 1981 1982 nest = nla_nest_start_noflag(skb, NDTA_PARMS); 1983 if (nest == NULL) 1984 return -ENOBUFS; 1985 1986 if ((parms->dev && 1987 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || 1988 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) || 1989 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, 1990 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) || 1991 /* approximative value for deprecated QUEUE_LEN (in packets) */ 1992 nla_put_u32(skb, NDTPA_QUEUE_LEN, 1993 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) || 1994 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) || 1995 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) || 1996 nla_put_u32(skb, NDTPA_UCAST_PROBES, 1997 NEIGH_VAR(parms, UCAST_PROBES)) || 1998 nla_put_u32(skb, NDTPA_MCAST_PROBES, 1999 NEIGH_VAR(parms, MCAST_PROBES)) || 2000 nla_put_u32(skb, NDTPA_MCAST_REPROBES, 2001 NEIGH_VAR(parms, MCAST_REPROBES)) || 2002 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time, 2003 NDTPA_PAD) || 2004 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, 2005 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) || 2006 nla_put_msecs(skb, NDTPA_GC_STALETIME, 2007 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) || 2008 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, 2009 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) || 2010 nla_put_msecs(skb, NDTPA_RETRANS_TIME, 2011 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) || 2012 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, 2013 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) || 2014 nla_put_msecs(skb, NDTPA_PROXY_DELAY, 2015 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) || 2016 nla_put_msecs(skb, NDTPA_LOCKTIME, 2017 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD)) 2018 goto nla_put_failure; 2019 return nla_nest_end(skb, nest); 2020 2021 nla_put_failure: 2022 nla_nest_cancel(skb, nest); 2023 return -EMSGSIZE; 2024 } 2025 2026 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, 2027 u32 pid, u32 seq, int type, int flags) 2028 { 2029 struct nlmsghdr *nlh; 2030 struct ndtmsg *ndtmsg; 2031 2032 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); 2033 if (nlh == NULL) 2034 return -EMSGSIZE; 2035 2036 ndtmsg = nlmsg_data(nlh); 2037 2038 read_lock_bh(&tbl->lock); 2039 ndtmsg->ndtm_family = tbl->family; 2040 ndtmsg->ndtm_pad1 = 0; 2041 ndtmsg->ndtm_pad2 = 0; 2042 2043 if (nla_put_string(skb, NDTA_NAME, tbl->id) || 2044 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) || 2045 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) || 2046 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) || 2047 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3)) 2048 goto nla_put_failure; 2049 { 2050 unsigned long now = jiffies; 2051 unsigned int flush_delta = now - tbl->last_flush; 2052 unsigned int rand_delta = now - tbl->last_rand; 2053 struct neigh_hash_table *nht; 2054 struct ndt_config ndc = { 2055 .ndtc_key_len = tbl->key_len, 2056 .ndtc_entry_size = tbl->entry_size, 2057 .ndtc_entries = atomic_read(&tbl->entries), 2058 .ndtc_last_flush = jiffies_to_msecs(flush_delta), 2059 .ndtc_last_rand = jiffies_to_msecs(rand_delta), 2060 .ndtc_proxy_qlen = tbl->proxy_queue.qlen, 2061 }; 2062 2063 rcu_read_lock_bh(); 2064 nht = rcu_dereference_bh(tbl->nht); 2065 ndc.ndtc_hash_rnd = nht->hash_rnd[0]; 2066 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); 2067 rcu_read_unlock_bh(); 2068 2069 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc)) 2070 goto nla_put_failure; 2071 } 2072 2073 { 2074 int cpu; 2075 struct ndt_stats ndst; 2076 2077 memset(&ndst, 0, sizeof(ndst)); 2078 2079 for_each_possible_cpu(cpu) { 2080 struct neigh_statistics *st; 2081 2082 st = per_cpu_ptr(tbl->stats, cpu); 2083 ndst.ndts_allocs += st->allocs; 2084 ndst.ndts_destroys += st->destroys; 2085 ndst.ndts_hash_grows += st->hash_grows; 2086 ndst.ndts_res_failed += st->res_failed; 2087 ndst.ndts_lookups += st->lookups; 2088 ndst.ndts_hits += st->hits; 2089 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast; 2090 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast; 2091 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs; 2092 ndst.ndts_forced_gc_runs += st->forced_gc_runs; 2093 ndst.ndts_table_fulls += st->table_fulls; 2094 } 2095 2096 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst, 2097 NDTA_PAD)) 2098 goto nla_put_failure; 2099 } 2100 2101 BUG_ON(tbl->parms.dev); 2102 if (neightbl_fill_parms(skb, &tbl->parms) < 0) 2103 goto nla_put_failure; 2104 2105 read_unlock_bh(&tbl->lock); 2106 nlmsg_end(skb, nlh); 2107 return 0; 2108 2109 nla_put_failure: 2110 read_unlock_bh(&tbl->lock); 2111 nlmsg_cancel(skb, nlh); 2112 return -EMSGSIZE; 2113 } 2114 2115 static int neightbl_fill_param_info(struct sk_buff *skb, 2116 struct neigh_table *tbl, 2117 struct neigh_parms *parms, 2118 u32 pid, u32 seq, int type, 2119 unsigned int flags) 2120 { 2121 struct ndtmsg *ndtmsg; 2122 struct nlmsghdr *nlh; 2123 2124 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); 2125 if (nlh == NULL) 2126 return -EMSGSIZE; 2127 2128 ndtmsg = nlmsg_data(nlh); 2129 2130 read_lock_bh(&tbl->lock); 2131 ndtmsg->ndtm_family = tbl->family; 2132 ndtmsg->ndtm_pad1 = 0; 2133 ndtmsg->ndtm_pad2 = 0; 2134 2135 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 || 2136 neightbl_fill_parms(skb, parms) < 0) 2137 goto errout; 2138 2139 read_unlock_bh(&tbl->lock); 2140 nlmsg_end(skb, nlh); 2141 return 0; 2142 errout: 2143 read_unlock_bh(&tbl->lock); 2144 nlmsg_cancel(skb, nlh); 2145 return -EMSGSIZE; 2146 } 2147 2148 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { 2149 [NDTA_NAME] = { .type = NLA_STRING }, 2150 [NDTA_THRESH1] = { .type = NLA_U32 }, 2151 [NDTA_THRESH2] = { .type = NLA_U32 }, 2152 [NDTA_THRESH3] = { .type = NLA_U32 }, 2153 [NDTA_GC_INTERVAL] = { .type = NLA_U64 }, 2154 [NDTA_PARMS] = { .type = NLA_NESTED }, 2155 }; 2156 2157 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { 2158 [NDTPA_IFINDEX] = { .type = NLA_U32 }, 2159 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, 2160 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, 2161 [NDTPA_APP_PROBES] = { .type = NLA_U32 }, 2162 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, 2163 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 }, 2164 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 }, 2165 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, 2166 [NDTPA_GC_STALETIME] = { .type = NLA_U64 }, 2167 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 }, 2168 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 }, 2169 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 }, 2170 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 }, 2171 [NDTPA_LOCKTIME] = { .type = NLA_U64 }, 2172 }; 2173 2174 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, 2175 struct netlink_ext_ack *extack) 2176 { 2177 struct net *net = sock_net(skb->sk); 2178 struct neigh_table *tbl; 2179 struct ndtmsg *ndtmsg; 2180 struct nlattr *tb[NDTA_MAX+1]; 2181 bool found = false; 2182 int err, tidx; 2183 2184 err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, 2185 nl_neightbl_policy, extack); 2186 if (err < 0) 2187 goto errout; 2188 2189 if (tb[NDTA_NAME] == NULL) { 2190 err = -EINVAL; 2191 goto errout; 2192 } 2193 2194 ndtmsg = nlmsg_data(nlh); 2195 2196 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { 2197 tbl = neigh_tables[tidx]; 2198 if (!tbl) 2199 continue; 2200 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) 2201 continue; 2202 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) { 2203 found = true; 2204 break; 2205 } 2206 } 2207 2208 if (!found) 2209 return -ENOENT; 2210 2211 /* 2212 * We acquire tbl->lock to be nice to the periodic timers and 2213 * make sure they always see a consistent set of values. 2214 */ 2215 write_lock_bh(&tbl->lock); 2216 2217 if (tb[NDTA_PARMS]) { 2218 struct nlattr *tbp[NDTPA_MAX+1]; 2219 struct neigh_parms *p; 2220 int i, ifindex = 0; 2221 2222 err = nla_parse_nested_deprecated(tbp, NDTPA_MAX, 2223 tb[NDTA_PARMS], 2224 nl_ntbl_parm_policy, extack); 2225 if (err < 0) 2226 goto errout_tbl_lock; 2227 2228 if (tbp[NDTPA_IFINDEX]) 2229 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); 2230 2231 p = lookup_neigh_parms(tbl, net, ifindex); 2232 if (p == NULL) { 2233 err = -ENOENT; 2234 goto errout_tbl_lock; 2235 } 2236 2237 for (i = 1; i <= NDTPA_MAX; i++) { 2238 if (tbp[i] == NULL) 2239 continue; 2240 2241 switch (i) { 2242 case NDTPA_QUEUE_LEN: 2243 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, 2244 nla_get_u32(tbp[i]) * 2245 SKB_TRUESIZE(ETH_FRAME_LEN)); 2246 break; 2247 case NDTPA_QUEUE_LENBYTES: 2248 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, 2249 nla_get_u32(tbp[i])); 2250 break; 2251 case NDTPA_PROXY_QLEN: 2252 NEIGH_VAR_SET(p, PROXY_QLEN, 2253 nla_get_u32(tbp[i])); 2254 break; 2255 case NDTPA_APP_PROBES: 2256 NEIGH_VAR_SET(p, APP_PROBES, 2257 nla_get_u32(tbp[i])); 2258 break; 2259 case NDTPA_UCAST_PROBES: 2260 NEIGH_VAR_SET(p, UCAST_PROBES, 2261 nla_get_u32(tbp[i])); 2262 break; 2263 case NDTPA_MCAST_PROBES: 2264 NEIGH_VAR_SET(p, MCAST_PROBES, 2265 nla_get_u32(tbp[i])); 2266 break; 2267 case NDTPA_MCAST_REPROBES: 2268 NEIGH_VAR_SET(p, MCAST_REPROBES, 2269 nla_get_u32(tbp[i])); 2270 break; 2271 case NDTPA_BASE_REACHABLE_TIME: 2272 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, 2273 nla_get_msecs(tbp[i])); 2274 /* update reachable_time as well, otherwise, the change will 2275 * only be effective after the next time neigh_periodic_work 2276 * decides to recompute it (can be multiple minutes) 2277 */ 2278 p->reachable_time = 2279 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 2280 break; 2281 case NDTPA_GC_STALETIME: 2282 NEIGH_VAR_SET(p, GC_STALETIME, 2283 nla_get_msecs(tbp[i])); 2284 break; 2285 case NDTPA_DELAY_PROBE_TIME: 2286 NEIGH_VAR_SET(p, DELAY_PROBE_TIME, 2287 nla_get_msecs(tbp[i])); 2288 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); 2289 break; 2290 case NDTPA_RETRANS_TIME: 2291 NEIGH_VAR_SET(p, RETRANS_TIME, 2292 nla_get_msecs(tbp[i])); 2293 break; 2294 case NDTPA_ANYCAST_DELAY: 2295 NEIGH_VAR_SET(p, ANYCAST_DELAY, 2296 nla_get_msecs(tbp[i])); 2297 break; 2298 case NDTPA_PROXY_DELAY: 2299 NEIGH_VAR_SET(p, PROXY_DELAY, 2300 nla_get_msecs(tbp[i])); 2301 break; 2302 case NDTPA_LOCKTIME: 2303 NEIGH_VAR_SET(p, LOCKTIME, 2304 nla_get_msecs(tbp[i])); 2305 break; 2306 } 2307 } 2308 } 2309 2310 err = -ENOENT; 2311 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] || 2312 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) && 2313 !net_eq(net, &init_net)) 2314 goto errout_tbl_lock; 2315 2316 if (tb[NDTA_THRESH1]) 2317 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); 2318 2319 if (tb[NDTA_THRESH2]) 2320 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]); 2321 2322 if (tb[NDTA_THRESH3]) 2323 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]); 2324 2325 if (tb[NDTA_GC_INTERVAL]) 2326 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]); 2327 2328 err = 0; 2329 2330 errout_tbl_lock: 2331 write_unlock_bh(&tbl->lock); 2332 errout: 2333 return err; 2334 } 2335 2336 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh, 2337 struct netlink_ext_ack *extack) 2338 { 2339 struct ndtmsg *ndtm; 2340 2341 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) { 2342 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request"); 2343 return -EINVAL; 2344 } 2345 2346 ndtm = nlmsg_data(nlh); 2347 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) { 2348 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request"); 2349 return -EINVAL; 2350 } 2351 2352 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) { 2353 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request"); 2354 return -EINVAL; 2355 } 2356 2357 return 0; 2358 } 2359 2360 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2361 { 2362 const struct nlmsghdr *nlh = cb->nlh; 2363 struct net *net = sock_net(skb->sk); 2364 int family, tidx, nidx = 0; 2365 int tbl_skip = cb->args[0]; 2366 int neigh_skip = cb->args[1]; 2367 struct neigh_table *tbl; 2368 2369 if (cb->strict_check) { 2370 int err = neightbl_valid_dump_info(nlh, cb->extack); 2371 2372 if (err < 0) 2373 return err; 2374 } 2375 2376 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; 2377 2378 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { 2379 struct neigh_parms *p; 2380 2381 tbl = neigh_tables[tidx]; 2382 if (!tbl) 2383 continue; 2384 2385 if (tidx < tbl_skip || (family && tbl->family != family)) 2386 continue; 2387 2388 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, 2389 nlh->nlmsg_seq, RTM_NEWNEIGHTBL, 2390 NLM_F_MULTI) < 0) 2391 break; 2392 2393 nidx = 0; 2394 p = list_next_entry(&tbl->parms, list); 2395 list_for_each_entry_from(p, &tbl->parms_list, list) { 2396 if (!net_eq(neigh_parms_net(p), net)) 2397 continue; 2398 2399 if (nidx < neigh_skip) 2400 goto next; 2401 2402 if (neightbl_fill_param_info(skb, tbl, p, 2403 NETLINK_CB(cb->skb).portid, 2404 nlh->nlmsg_seq, 2405 RTM_NEWNEIGHTBL, 2406 NLM_F_MULTI) < 0) 2407 goto out; 2408 next: 2409 nidx++; 2410 } 2411 2412 neigh_skip = 0; 2413 } 2414 out: 2415 cb->args[0] = tidx; 2416 cb->args[1] = nidx; 2417 2418 return skb->len; 2419 } 2420 2421 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, 2422 u32 pid, u32 seq, int type, unsigned int flags) 2423 { 2424 unsigned long now = jiffies; 2425 struct nda_cacheinfo ci; 2426 struct nlmsghdr *nlh; 2427 struct ndmsg *ndm; 2428 2429 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); 2430 if (nlh == NULL) 2431 return -EMSGSIZE; 2432 2433 ndm = nlmsg_data(nlh); 2434 ndm->ndm_family = neigh->ops->family; 2435 ndm->ndm_pad1 = 0; 2436 ndm->ndm_pad2 = 0; 2437 ndm->ndm_flags = neigh->flags; 2438 ndm->ndm_type = neigh->type; 2439 ndm->ndm_ifindex = neigh->dev->ifindex; 2440 2441 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key)) 2442 goto nla_put_failure; 2443 2444 read_lock_bh(&neigh->lock); 2445 ndm->ndm_state = neigh->nud_state; 2446 if (neigh->nud_state & NUD_VALID) { 2447 char haddr[MAX_ADDR_LEN]; 2448 2449 neigh_ha_snapshot(haddr, neigh, neigh->dev); 2450 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) { 2451 read_unlock_bh(&neigh->lock); 2452 goto nla_put_failure; 2453 } 2454 } 2455 2456 ci.ndm_used = jiffies_to_clock_t(now - neigh->used); 2457 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed); 2458 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated); 2459 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1; 2460 read_unlock_bh(&neigh->lock); 2461 2462 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) || 2463 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) 2464 goto nla_put_failure; 2465 2466 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol)) 2467 goto nla_put_failure; 2468 2469 nlmsg_end(skb, nlh); 2470 return 0; 2471 2472 nla_put_failure: 2473 nlmsg_cancel(skb, nlh); 2474 return -EMSGSIZE; 2475 } 2476 2477 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, 2478 u32 pid, u32 seq, int type, unsigned int flags, 2479 struct neigh_table *tbl) 2480 { 2481 struct nlmsghdr *nlh; 2482 struct ndmsg *ndm; 2483 2484 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); 2485 if (nlh == NULL) 2486 return -EMSGSIZE; 2487 2488 ndm = nlmsg_data(nlh); 2489 ndm->ndm_family = tbl->family; 2490 ndm->ndm_pad1 = 0; 2491 ndm->ndm_pad2 = 0; 2492 ndm->ndm_flags = pn->flags | NTF_PROXY; 2493 ndm->ndm_type = RTN_UNICAST; 2494 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0; 2495 ndm->ndm_state = NUD_NONE; 2496 2497 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) 2498 goto nla_put_failure; 2499 2500 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol)) 2501 goto nla_put_failure; 2502 2503 nlmsg_end(skb, nlh); 2504 return 0; 2505 2506 nla_put_failure: 2507 nlmsg_cancel(skb, nlh); 2508 return -EMSGSIZE; 2509 } 2510 2511 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid) 2512 { 2513 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); 2514 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid); 2515 } 2516 2517 static bool neigh_master_filtered(struct net_device *dev, int master_idx) 2518 { 2519 struct net_device *master; 2520 2521 if (!master_idx) 2522 return false; 2523 2524 master = dev ? netdev_master_upper_dev_get(dev) : NULL; 2525 if (!master || master->ifindex != master_idx) 2526 return true; 2527 2528 return false; 2529 } 2530 2531 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx) 2532 { 2533 if (filter_idx && (!dev || dev->ifindex != filter_idx)) 2534 return true; 2535 2536 return false; 2537 } 2538 2539 struct neigh_dump_filter { 2540 int master_idx; 2541 int dev_idx; 2542 }; 2543 2544 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2545 struct netlink_callback *cb, 2546 struct neigh_dump_filter *filter) 2547 { 2548 struct net *net = sock_net(skb->sk); 2549 struct neighbour *n; 2550 int rc, h, s_h = cb->args[1]; 2551 int idx, s_idx = idx = cb->args[2]; 2552 struct neigh_hash_table *nht; 2553 unsigned int flags = NLM_F_MULTI; 2554 2555 if (filter->dev_idx || filter->master_idx) 2556 flags |= NLM_F_DUMP_FILTERED; 2557 2558 rcu_read_lock_bh(); 2559 nht = rcu_dereference_bh(tbl->nht); 2560 2561 for (h = s_h; h < (1 << nht->hash_shift); h++) { 2562 if (h > s_h) 2563 s_idx = 0; 2564 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; 2565 n != NULL; 2566 n = rcu_dereference_bh(n->next)) { 2567 if (idx < s_idx || !net_eq(dev_net(n->dev), net)) 2568 goto next; 2569 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || 2570 neigh_master_filtered(n->dev, filter->master_idx)) 2571 goto next; 2572 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2573 cb->nlh->nlmsg_seq, 2574 RTM_NEWNEIGH, 2575 flags) < 0) { 2576 rc = -1; 2577 goto out; 2578 } 2579 next: 2580 idx++; 2581 } 2582 } 2583 rc = skb->len; 2584 out: 2585 rcu_read_unlock_bh(); 2586 cb->args[1] = h; 2587 cb->args[2] = idx; 2588 return rc; 2589 } 2590 2591 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2592 struct netlink_callback *cb, 2593 struct neigh_dump_filter *filter) 2594 { 2595 struct pneigh_entry *n; 2596 struct net *net = sock_net(skb->sk); 2597 int rc, h, s_h = cb->args[3]; 2598 int idx, s_idx = idx = cb->args[4]; 2599 unsigned int flags = NLM_F_MULTI; 2600 2601 if (filter->dev_idx || filter->master_idx) 2602 flags |= NLM_F_DUMP_FILTERED; 2603 2604 read_lock_bh(&tbl->lock); 2605 2606 for (h = s_h; h <= PNEIGH_HASHMASK; h++) { 2607 if (h > s_h) 2608 s_idx = 0; 2609 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { 2610 if (idx < s_idx || pneigh_net(n) != net) 2611 goto next; 2612 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || 2613 neigh_master_filtered(n->dev, filter->master_idx)) 2614 goto next; 2615 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2616 cb->nlh->nlmsg_seq, 2617 RTM_NEWNEIGH, flags, tbl) < 0) { 2618 read_unlock_bh(&tbl->lock); 2619 rc = -1; 2620 goto out; 2621 } 2622 next: 2623 idx++; 2624 } 2625 } 2626 2627 read_unlock_bh(&tbl->lock); 2628 rc = skb->len; 2629 out: 2630 cb->args[3] = h; 2631 cb->args[4] = idx; 2632 return rc; 2633 2634 } 2635 2636 static int neigh_valid_dump_req(const struct nlmsghdr *nlh, 2637 bool strict_check, 2638 struct neigh_dump_filter *filter, 2639 struct netlink_ext_ack *extack) 2640 { 2641 struct nlattr *tb[NDA_MAX + 1]; 2642 int err, i; 2643 2644 if (strict_check) { 2645 struct ndmsg *ndm; 2646 2647 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { 2648 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request"); 2649 return -EINVAL; 2650 } 2651 2652 ndm = nlmsg_data(nlh); 2653 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex || 2654 ndm->ndm_state || ndm->ndm_type) { 2655 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request"); 2656 return -EINVAL; 2657 } 2658 2659 if (ndm->ndm_flags & ~NTF_PROXY) { 2660 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request"); 2661 return -EINVAL; 2662 } 2663 2664 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), 2665 tb, NDA_MAX, nda_policy, 2666 extack); 2667 } else { 2668 err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb, 2669 NDA_MAX, nda_policy, extack); 2670 } 2671 if (err < 0) 2672 return err; 2673 2674 for (i = 0; i <= NDA_MAX; ++i) { 2675 if (!tb[i]) 2676 continue; 2677 2678 /* all new attributes should require strict_check */ 2679 switch (i) { 2680 case NDA_IFINDEX: 2681 filter->dev_idx = nla_get_u32(tb[i]); 2682 break; 2683 case NDA_MASTER: 2684 filter->master_idx = nla_get_u32(tb[i]); 2685 break; 2686 default: 2687 if (strict_check) { 2688 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request"); 2689 return -EINVAL; 2690 } 2691 } 2692 } 2693 2694 return 0; 2695 } 2696 2697 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2698 { 2699 const struct nlmsghdr *nlh = cb->nlh; 2700 struct neigh_dump_filter filter = {}; 2701 struct neigh_table *tbl; 2702 int t, family, s_t; 2703 int proxy = 0; 2704 int err; 2705 2706 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; 2707 2708 /* check for full ndmsg structure presence, family member is 2709 * the same for both structures 2710 */ 2711 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) && 2712 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY) 2713 proxy = 1; 2714 2715 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack); 2716 if (err < 0 && cb->strict_check) 2717 return err; 2718 2719 s_t = cb->args[0]; 2720 2721 for (t = 0; t < NEIGH_NR_TABLES; t++) { 2722 tbl = neigh_tables[t]; 2723 2724 if (!tbl) 2725 continue; 2726 if (t < s_t || (family && tbl->family != family)) 2727 continue; 2728 if (t > s_t) 2729 memset(&cb->args[1], 0, sizeof(cb->args) - 2730 sizeof(cb->args[0])); 2731 if (proxy) 2732 err = pneigh_dump_table(tbl, skb, cb, &filter); 2733 else 2734 err = neigh_dump_table(tbl, skb, cb, &filter); 2735 if (err < 0) 2736 break; 2737 } 2738 2739 cb->args[0] = t; 2740 return skb->len; 2741 } 2742 2743 static int neigh_valid_get_req(const struct nlmsghdr *nlh, 2744 struct neigh_table **tbl, 2745 void **dst, int *dev_idx, u8 *ndm_flags, 2746 struct netlink_ext_ack *extack) 2747 { 2748 struct nlattr *tb[NDA_MAX + 1]; 2749 struct ndmsg *ndm; 2750 int err, i; 2751 2752 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { 2753 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request"); 2754 return -EINVAL; 2755 } 2756 2757 ndm = nlmsg_data(nlh); 2758 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || 2759 ndm->ndm_type) { 2760 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request"); 2761 return -EINVAL; 2762 } 2763 2764 if (ndm->ndm_flags & ~NTF_PROXY) { 2765 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request"); 2766 return -EINVAL; 2767 } 2768 2769 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb, 2770 NDA_MAX, nda_policy, extack); 2771 if (err < 0) 2772 return err; 2773 2774 *ndm_flags = ndm->ndm_flags; 2775 *dev_idx = ndm->ndm_ifindex; 2776 *tbl = neigh_find_table(ndm->ndm_family); 2777 if (*tbl == NULL) { 2778 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request"); 2779 return -EAFNOSUPPORT; 2780 } 2781 2782 for (i = 0; i <= NDA_MAX; ++i) { 2783 if (!tb[i]) 2784 continue; 2785 2786 switch (i) { 2787 case NDA_DST: 2788 if (nla_len(tb[i]) != (int)(*tbl)->key_len) { 2789 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request"); 2790 return -EINVAL; 2791 } 2792 *dst = nla_data(tb[i]); 2793 break; 2794 default: 2795 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request"); 2796 return -EINVAL; 2797 } 2798 } 2799 2800 return 0; 2801 } 2802 2803 static inline size_t neigh_nlmsg_size(void) 2804 { 2805 return NLMSG_ALIGN(sizeof(struct ndmsg)) 2806 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ 2807 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */ 2808 + nla_total_size(sizeof(struct nda_cacheinfo)) 2809 + nla_total_size(4) /* NDA_PROBES */ 2810 + nla_total_size(1); /* NDA_PROTOCOL */ 2811 } 2812 2813 static int neigh_get_reply(struct net *net, struct neighbour *neigh, 2814 u32 pid, u32 seq) 2815 { 2816 struct sk_buff *skb; 2817 int err = 0; 2818 2819 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL); 2820 if (!skb) 2821 return -ENOBUFS; 2822 2823 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0); 2824 if (err) { 2825 kfree_skb(skb); 2826 goto errout; 2827 } 2828 2829 err = rtnl_unicast(skb, net, pid); 2830 errout: 2831 return err; 2832 } 2833 2834 static inline size_t pneigh_nlmsg_size(void) 2835 { 2836 return NLMSG_ALIGN(sizeof(struct ndmsg)) 2837 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ 2838 + nla_total_size(1); /* NDA_PROTOCOL */ 2839 } 2840 2841 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh, 2842 u32 pid, u32 seq, struct neigh_table *tbl) 2843 { 2844 struct sk_buff *skb; 2845 int err = 0; 2846 2847 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL); 2848 if (!skb) 2849 return -ENOBUFS; 2850 2851 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl); 2852 if (err) { 2853 kfree_skb(skb); 2854 goto errout; 2855 } 2856 2857 err = rtnl_unicast(skb, net, pid); 2858 errout: 2859 return err; 2860 } 2861 2862 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2863 struct netlink_ext_ack *extack) 2864 { 2865 struct net *net = sock_net(in_skb->sk); 2866 struct net_device *dev = NULL; 2867 struct neigh_table *tbl = NULL; 2868 struct neighbour *neigh; 2869 void *dst = NULL; 2870 u8 ndm_flags = 0; 2871 int dev_idx = 0; 2872 int err; 2873 2874 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags, 2875 extack); 2876 if (err < 0) 2877 return err; 2878 2879 if (dev_idx) { 2880 dev = __dev_get_by_index(net, dev_idx); 2881 if (!dev) { 2882 NL_SET_ERR_MSG(extack, "Unknown device ifindex"); 2883 return -ENODEV; 2884 } 2885 } 2886 2887 if (!dst) { 2888 NL_SET_ERR_MSG(extack, "Network address not specified"); 2889 return -EINVAL; 2890 } 2891 2892 if (ndm_flags & NTF_PROXY) { 2893 struct pneigh_entry *pn; 2894 2895 pn = pneigh_lookup(tbl, net, dst, dev, 0); 2896 if (!pn) { 2897 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found"); 2898 return -ENOENT; 2899 } 2900 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid, 2901 nlh->nlmsg_seq, tbl); 2902 } 2903 2904 if (!dev) { 2905 NL_SET_ERR_MSG(extack, "No device specified"); 2906 return -EINVAL; 2907 } 2908 2909 neigh = neigh_lookup(tbl, dst, dev); 2910 if (!neigh) { 2911 NL_SET_ERR_MSG(extack, "Neighbour entry not found"); 2912 return -ENOENT; 2913 } 2914 2915 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid, 2916 nlh->nlmsg_seq); 2917 2918 neigh_release(neigh); 2919 2920 return err; 2921 } 2922 2923 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) 2924 { 2925 int chain; 2926 struct neigh_hash_table *nht; 2927 2928 rcu_read_lock_bh(); 2929 nht = rcu_dereference_bh(tbl->nht); 2930 2931 read_lock(&tbl->lock); /* avoid resizes */ 2932 for (chain = 0; chain < (1 << nht->hash_shift); chain++) { 2933 struct neighbour *n; 2934 2935 for (n = rcu_dereference_bh(nht->hash_buckets[chain]); 2936 n != NULL; 2937 n = rcu_dereference_bh(n->next)) 2938 cb(n, cookie); 2939 } 2940 read_unlock(&tbl->lock); 2941 rcu_read_unlock_bh(); 2942 } 2943 EXPORT_SYMBOL(neigh_for_each); 2944 2945 /* The tbl->lock must be held as a writer and BH disabled. */ 2946 void __neigh_for_each_release(struct neigh_table *tbl, 2947 int (*cb)(struct neighbour *)) 2948 { 2949 int chain; 2950 struct neigh_hash_table *nht; 2951 2952 nht = rcu_dereference_protected(tbl->nht, 2953 lockdep_is_held(&tbl->lock)); 2954 for (chain = 0; chain < (1 << nht->hash_shift); chain++) { 2955 struct neighbour *n; 2956 struct neighbour __rcu **np; 2957 2958 np = &nht->hash_buckets[chain]; 2959 while ((n = rcu_dereference_protected(*np, 2960 lockdep_is_held(&tbl->lock))) != NULL) { 2961 int release; 2962 2963 write_lock(&n->lock); 2964 release = cb(n); 2965 if (release) { 2966 rcu_assign_pointer(*np, 2967 rcu_dereference_protected(n->next, 2968 lockdep_is_held(&tbl->lock))); 2969 neigh_mark_dead(n); 2970 } else 2971 np = &n->next; 2972 write_unlock(&n->lock); 2973 if (release) 2974 neigh_cleanup_and_release(n); 2975 } 2976 } 2977 } 2978 EXPORT_SYMBOL(__neigh_for_each_release); 2979 2980 int neigh_xmit(int index, struct net_device *dev, 2981 const void *addr, struct sk_buff *skb) 2982 { 2983 int err = -EAFNOSUPPORT; 2984 if (likely(index < NEIGH_NR_TABLES)) { 2985 struct neigh_table *tbl; 2986 struct neighbour *neigh; 2987 2988 tbl = neigh_tables[index]; 2989 if (!tbl) 2990 goto out; 2991 rcu_read_lock_bh(); 2992 if (index == NEIGH_ARP_TABLE) { 2993 u32 key = *((u32 *)addr); 2994 2995 neigh = __ipv4_neigh_lookup_noref(dev, key); 2996 } else { 2997 neigh = __neigh_lookup_noref(tbl, addr, dev); 2998 } 2999 if (!neigh) 3000 neigh = __neigh_create(tbl, addr, dev, false); 3001 err = PTR_ERR(neigh); 3002 if (IS_ERR(neigh)) { 3003 rcu_read_unlock_bh(); 3004 goto out_kfree_skb; 3005 } 3006 err = neigh->output(neigh, skb); 3007 rcu_read_unlock_bh(); 3008 } 3009 else if (index == NEIGH_LINK_TABLE) { 3010 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 3011 addr, NULL, skb->len); 3012 if (err < 0) 3013 goto out_kfree_skb; 3014 err = dev_queue_xmit(skb); 3015 } 3016 out: 3017 return err; 3018 out_kfree_skb: 3019 kfree_skb(skb); 3020 goto out; 3021 } 3022 EXPORT_SYMBOL(neigh_xmit); 3023 3024 #ifdef CONFIG_PROC_FS 3025 3026 static struct neighbour *neigh_get_first(struct seq_file *seq) 3027 { 3028 struct neigh_seq_state *state = seq->private; 3029 struct net *net = seq_file_net(seq); 3030 struct neigh_hash_table *nht = state->nht; 3031 struct neighbour *n = NULL; 3032 int bucket = state->bucket; 3033 3034 state->flags &= ~NEIGH_SEQ_IS_PNEIGH; 3035 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { 3036 n = rcu_dereference_bh(nht->hash_buckets[bucket]); 3037 3038 while (n) { 3039 if (!net_eq(dev_net(n->dev), net)) 3040 goto next; 3041 if (state->neigh_sub_iter) { 3042 loff_t fakep = 0; 3043 void *v; 3044 3045 v = state->neigh_sub_iter(state, n, &fakep); 3046 if (!v) 3047 goto next; 3048 } 3049 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) 3050 break; 3051 if (n->nud_state & ~NUD_NOARP) 3052 break; 3053 next: 3054 n = rcu_dereference_bh(n->next); 3055 } 3056 3057 if (n) 3058 break; 3059 } 3060 state->bucket = bucket; 3061 3062 return n; 3063 } 3064 3065 static struct neighbour *neigh_get_next(struct seq_file *seq, 3066 struct neighbour *n, 3067 loff_t *pos) 3068 { 3069 struct neigh_seq_state *state = seq->private; 3070 struct net *net = seq_file_net(seq); 3071 struct neigh_hash_table *nht = state->nht; 3072 3073 if (state->neigh_sub_iter) { 3074 void *v = state->neigh_sub_iter(state, n, pos); 3075 if (v) 3076 return n; 3077 } 3078 n = rcu_dereference_bh(n->next); 3079 3080 while (1) { 3081 while (n) { 3082 if (!net_eq(dev_net(n->dev), net)) 3083 goto next; 3084 if (state->neigh_sub_iter) { 3085 void *v = state->neigh_sub_iter(state, n, pos); 3086 if (v) 3087 return n; 3088 goto next; 3089 } 3090 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) 3091 break; 3092 3093 if (n->nud_state & ~NUD_NOARP) 3094 break; 3095 next: 3096 n = rcu_dereference_bh(n->next); 3097 } 3098 3099 if (n) 3100 break; 3101 3102 if (++state->bucket >= (1 << nht->hash_shift)) 3103 break; 3104 3105 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]); 3106 } 3107 3108 if (n && pos) 3109 --(*pos); 3110 return n; 3111 } 3112 3113 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) 3114 { 3115 struct neighbour *n = neigh_get_first(seq); 3116 3117 if (n) { 3118 --(*pos); 3119 while (*pos) { 3120 n = neigh_get_next(seq, n, pos); 3121 if (!n) 3122 break; 3123 } 3124 } 3125 return *pos ? NULL : n; 3126 } 3127 3128 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) 3129 { 3130 struct neigh_seq_state *state = seq->private; 3131 struct net *net = seq_file_net(seq); 3132 struct neigh_table *tbl = state->tbl; 3133 struct pneigh_entry *pn = NULL; 3134 int bucket = state->bucket; 3135 3136 state->flags |= NEIGH_SEQ_IS_PNEIGH; 3137 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { 3138 pn = tbl->phash_buckets[bucket]; 3139 while (pn && !net_eq(pneigh_net(pn), net)) 3140 pn = pn->next; 3141 if (pn) 3142 break; 3143 } 3144 state->bucket = bucket; 3145 3146 return pn; 3147 } 3148 3149 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, 3150 struct pneigh_entry *pn, 3151 loff_t *pos) 3152 { 3153 struct neigh_seq_state *state = seq->private; 3154 struct net *net = seq_file_net(seq); 3155 struct neigh_table *tbl = state->tbl; 3156 3157 do { 3158 pn = pn->next; 3159 } while (pn && !net_eq(pneigh_net(pn), net)); 3160 3161 while (!pn) { 3162 if (++state->bucket > PNEIGH_HASHMASK) 3163 break; 3164 pn = tbl->phash_buckets[state->bucket]; 3165 while (pn && !net_eq(pneigh_net(pn), net)) 3166 pn = pn->next; 3167 if (pn) 3168 break; 3169 } 3170 3171 if (pn && pos) 3172 --(*pos); 3173 3174 return pn; 3175 } 3176 3177 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos) 3178 { 3179 struct pneigh_entry *pn = pneigh_get_first(seq); 3180 3181 if (pn) { 3182 --(*pos); 3183 while (*pos) { 3184 pn = pneigh_get_next(seq, pn, pos); 3185 if (!pn) 3186 break; 3187 } 3188 } 3189 return *pos ? NULL : pn; 3190 } 3191 3192 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) 3193 { 3194 struct neigh_seq_state *state = seq->private; 3195 void *rc; 3196 loff_t idxpos = *pos; 3197 3198 rc = neigh_get_idx(seq, &idxpos); 3199 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY)) 3200 rc = pneigh_get_idx(seq, &idxpos); 3201 3202 return rc; 3203 } 3204 3205 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) 3206 __acquires(rcu_bh) 3207 { 3208 struct neigh_seq_state *state = seq->private; 3209 3210 state->tbl = tbl; 3211 state->bucket = 0; 3212 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); 3213 3214 rcu_read_lock_bh(); 3215 state->nht = rcu_dereference_bh(tbl->nht); 3216 3217 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; 3218 } 3219 EXPORT_SYMBOL(neigh_seq_start); 3220 3221 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3222 { 3223 struct neigh_seq_state *state; 3224 void *rc; 3225 3226 if (v == SEQ_START_TOKEN) { 3227 rc = neigh_get_first(seq); 3228 goto out; 3229 } 3230 3231 state = seq->private; 3232 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) { 3233 rc = neigh_get_next(seq, v, NULL); 3234 if (rc) 3235 goto out; 3236 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY)) 3237 rc = pneigh_get_first(seq); 3238 } else { 3239 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY); 3240 rc = pneigh_get_next(seq, v, NULL); 3241 } 3242 out: 3243 ++(*pos); 3244 return rc; 3245 } 3246 EXPORT_SYMBOL(neigh_seq_next); 3247 3248 void neigh_seq_stop(struct seq_file *seq, void *v) 3249 __releases(rcu_bh) 3250 { 3251 rcu_read_unlock_bh(); 3252 } 3253 EXPORT_SYMBOL(neigh_seq_stop); 3254 3255 /* statistics via seq_file */ 3256 3257 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) 3258 { 3259 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3260 int cpu; 3261 3262 if (*pos == 0) 3263 return SEQ_START_TOKEN; 3264 3265 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { 3266 if (!cpu_possible(cpu)) 3267 continue; 3268 *pos = cpu+1; 3269 return per_cpu_ptr(tbl->stats, cpu); 3270 } 3271 return NULL; 3272 } 3273 3274 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3275 { 3276 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3277 int cpu; 3278 3279 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { 3280 if (!cpu_possible(cpu)) 3281 continue; 3282 *pos = cpu+1; 3283 return per_cpu_ptr(tbl->stats, cpu); 3284 } 3285 return NULL; 3286 } 3287 3288 static void neigh_stat_seq_stop(struct seq_file *seq, void *v) 3289 { 3290 3291 } 3292 3293 static int neigh_stat_seq_show(struct seq_file *seq, void *v) 3294 { 3295 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3296 struct neigh_statistics *st = v; 3297 3298 if (v == SEQ_START_TOKEN) { 3299 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n"); 3300 return 0; 3301 } 3302 3303 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " 3304 "%08lx %08lx %08lx %08lx %08lx %08lx\n", 3305 atomic_read(&tbl->entries), 3306 3307 st->allocs, 3308 st->destroys, 3309 st->hash_grows, 3310 3311 st->lookups, 3312 st->hits, 3313 3314 st->res_failed, 3315 3316 st->rcv_probes_mcast, 3317 st->rcv_probes_ucast, 3318 3319 st->periodic_gc_runs, 3320 st->forced_gc_runs, 3321 st->unres_discards, 3322 st->table_fulls 3323 ); 3324 3325 return 0; 3326 } 3327 3328 static const struct seq_operations neigh_stat_seq_ops = { 3329 .start = neigh_stat_seq_start, 3330 .next = neigh_stat_seq_next, 3331 .stop = neigh_stat_seq_stop, 3332 .show = neigh_stat_seq_show, 3333 }; 3334 #endif /* CONFIG_PROC_FS */ 3335 3336 static void __neigh_notify(struct neighbour *n, int type, int flags, 3337 u32 pid) 3338 { 3339 struct net *net = dev_net(n->dev); 3340 struct sk_buff *skb; 3341 int err = -ENOBUFS; 3342 3343 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC); 3344 if (skb == NULL) 3345 goto errout; 3346 3347 err = neigh_fill_info(skb, n, pid, 0, type, flags); 3348 if (err < 0) { 3349 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */ 3350 WARN_ON(err == -EMSGSIZE); 3351 kfree_skb(skb); 3352 goto errout; 3353 } 3354 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); 3355 return; 3356 errout: 3357 if (err < 0) 3358 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); 3359 } 3360 3361 void neigh_app_ns(struct neighbour *n) 3362 { 3363 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0); 3364 } 3365 EXPORT_SYMBOL(neigh_app_ns); 3366 3367 #ifdef CONFIG_SYSCTL 3368 static int zero; 3369 static int int_max = INT_MAX; 3370 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); 3371 3372 static int proc_unres_qlen(struct ctl_table *ctl, int write, 3373 void __user *buffer, size_t *lenp, loff_t *ppos) 3374 { 3375 int size, ret; 3376 struct ctl_table tmp = *ctl; 3377 3378 tmp.extra1 = &zero; 3379 tmp.extra2 = &unres_qlen_max; 3380 tmp.data = &size; 3381 3382 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN); 3383 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 3384 3385 if (write && !ret) 3386 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); 3387 return ret; 3388 } 3389 3390 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, 3391 int family) 3392 { 3393 switch (family) { 3394 case AF_INET: 3395 return __in_dev_arp_parms_get_rcu(dev); 3396 case AF_INET6: 3397 return __in6_dev_nd_parms_get_rcu(dev); 3398 } 3399 return NULL; 3400 } 3401 3402 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, 3403 int index) 3404 { 3405 struct net_device *dev; 3406 int family = neigh_parms_family(p); 3407 3408 rcu_read_lock(); 3409 for_each_netdev_rcu(net, dev) { 3410 struct neigh_parms *dst_p = 3411 neigh_get_dev_parms_rcu(dev, family); 3412 3413 if (dst_p && !test_bit(index, dst_p->data_state)) 3414 dst_p->data[index] = p->data[index]; 3415 } 3416 rcu_read_unlock(); 3417 } 3418 3419 static void neigh_proc_update(struct ctl_table *ctl, int write) 3420 { 3421 struct net_device *dev = ctl->extra1; 3422 struct neigh_parms *p = ctl->extra2; 3423 struct net *net = neigh_parms_net(p); 3424 int index = (int *) ctl->data - p->data; 3425 3426 if (!write) 3427 return; 3428 3429 set_bit(index, p->data_state); 3430 if (index == NEIGH_VAR_DELAY_PROBE_TIME) 3431 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); 3432 if (!dev) /* NULL dev means this is default value */ 3433 neigh_copy_dflt_parms(net, p, index); 3434 } 3435 3436 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, 3437 void __user *buffer, 3438 size_t *lenp, loff_t *ppos) 3439 { 3440 struct ctl_table tmp = *ctl; 3441 int ret; 3442 3443 tmp.extra1 = &zero; 3444 tmp.extra2 = &int_max; 3445 3446 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 3447 neigh_proc_update(ctl, write); 3448 return ret; 3449 } 3450 3451 int neigh_proc_dointvec(struct ctl_table *ctl, int write, 3452 void __user *buffer, size_t *lenp, loff_t *ppos) 3453 { 3454 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 3455 3456 neigh_proc_update(ctl, write); 3457 return ret; 3458 } 3459 EXPORT_SYMBOL(neigh_proc_dointvec); 3460 3461 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, 3462 void __user *buffer, 3463 size_t *lenp, loff_t *ppos) 3464 { 3465 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); 3466 3467 neigh_proc_update(ctl, write); 3468 return ret; 3469 } 3470 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies); 3471 3472 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write, 3473 void __user *buffer, 3474 size_t *lenp, loff_t *ppos) 3475 { 3476 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos); 3477 3478 neigh_proc_update(ctl, write); 3479 return ret; 3480 } 3481 3482 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, 3483 void __user *buffer, 3484 size_t *lenp, loff_t *ppos) 3485 { 3486 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); 3487 3488 neigh_proc_update(ctl, write); 3489 return ret; 3490 } 3491 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies); 3492 3493 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, 3494 void __user *buffer, 3495 size_t *lenp, loff_t *ppos) 3496 { 3497 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos); 3498 3499 neigh_proc_update(ctl, write); 3500 return ret; 3501 } 3502 3503 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write, 3504 void __user *buffer, 3505 size_t *lenp, loff_t *ppos) 3506 { 3507 struct neigh_parms *p = ctl->extra2; 3508 int ret; 3509 3510 if (strcmp(ctl->procname, "base_reachable_time") == 0) 3511 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); 3512 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0) 3513 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); 3514 else 3515 ret = -1; 3516 3517 if (write && ret == 0) { 3518 /* update reachable_time as well, otherwise, the change will 3519 * only be effective after the next time neigh_periodic_work 3520 * decides to recompute it 3521 */ 3522 p->reachable_time = 3523 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 3524 } 3525 return ret; 3526 } 3527 3528 #define NEIGH_PARMS_DATA_OFFSET(index) \ 3529 (&((struct neigh_parms *) 0)->data[index]) 3530 3531 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \ 3532 [NEIGH_VAR_ ## attr] = { \ 3533 .procname = name, \ 3534 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \ 3535 .maxlen = sizeof(int), \ 3536 .mode = mval, \ 3537 .proc_handler = proc, \ 3538 } 3539 3540 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \ 3541 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax) 3542 3543 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \ 3544 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies) 3545 3546 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \ 3547 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies) 3548 3549 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \ 3550 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies) 3551 3552 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \ 3553 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies) 3554 3555 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \ 3556 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen) 3557 3558 static struct neigh_sysctl_table { 3559 struct ctl_table_header *sysctl_header; 3560 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; 3561 } neigh_sysctl_template __read_mostly = { 3562 .neigh_vars = { 3563 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), 3564 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"), 3565 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"), 3566 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"), 3567 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), 3568 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), 3569 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), 3570 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"), 3571 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"), 3572 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"), 3573 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"), 3574 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"), 3575 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"), 3576 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"), 3577 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"), 3578 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"), 3579 [NEIGH_VAR_GC_INTERVAL] = { 3580 .procname = "gc_interval", 3581 .maxlen = sizeof(int), 3582 .mode = 0644, 3583 .proc_handler = proc_dointvec_jiffies, 3584 }, 3585 [NEIGH_VAR_GC_THRESH1] = { 3586 .procname = "gc_thresh1", 3587 .maxlen = sizeof(int), 3588 .mode = 0644, 3589 .extra1 = &zero, 3590 .extra2 = &int_max, 3591 .proc_handler = proc_dointvec_minmax, 3592 }, 3593 [NEIGH_VAR_GC_THRESH2] = { 3594 .procname = "gc_thresh2", 3595 .maxlen = sizeof(int), 3596 .mode = 0644, 3597 .extra1 = &zero, 3598 .extra2 = &int_max, 3599 .proc_handler = proc_dointvec_minmax, 3600 }, 3601 [NEIGH_VAR_GC_THRESH3] = { 3602 .procname = "gc_thresh3", 3603 .maxlen = sizeof(int), 3604 .mode = 0644, 3605 .extra1 = &zero, 3606 .extra2 = &int_max, 3607 .proc_handler = proc_dointvec_minmax, 3608 }, 3609 {}, 3610 }, 3611 }; 3612 3613 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, 3614 proc_handler *handler) 3615 { 3616 int i; 3617 struct neigh_sysctl_table *t; 3618 const char *dev_name_source; 3619 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; 3620 char *p_name; 3621 3622 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); 3623 if (!t) 3624 goto err; 3625 3626 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) { 3627 t->neigh_vars[i].data += (long) p; 3628 t->neigh_vars[i].extra1 = dev; 3629 t->neigh_vars[i].extra2 = p; 3630 } 3631 3632 if (dev) { 3633 dev_name_source = dev->name; 3634 /* Terminate the table early */ 3635 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, 3636 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); 3637 } else { 3638 struct neigh_table *tbl = p->tbl; 3639 dev_name_source = "default"; 3640 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval; 3641 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1; 3642 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2; 3643 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3; 3644 } 3645 3646 if (handler) { 3647 /* RetransTime */ 3648 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; 3649 /* ReachableTime */ 3650 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; 3651 /* RetransTime (in milliseconds)*/ 3652 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; 3653 /* ReachableTime (in milliseconds) */ 3654 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; 3655 } else { 3656 /* Those handlers will update p->reachable_time after 3657 * base_reachable_time(_ms) is set to ensure the new timer starts being 3658 * applied after the next neighbour update instead of waiting for 3659 * neigh_periodic_work to update its value (can be multiple minutes) 3660 * So any handler that replaces them should do this as well 3661 */ 3662 /* ReachableTime */ 3663 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = 3664 neigh_proc_base_reachable_time; 3665 /* ReachableTime (in milliseconds) */ 3666 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = 3667 neigh_proc_base_reachable_time; 3668 } 3669 3670 /* Don't export sysctls to unprivileged users */ 3671 if (neigh_parms_net(p)->user_ns != &init_user_ns) 3672 t->neigh_vars[0].procname = NULL; 3673 3674 switch (neigh_parms_family(p)) { 3675 case AF_INET: 3676 p_name = "ipv4"; 3677 break; 3678 case AF_INET6: 3679 p_name = "ipv6"; 3680 break; 3681 default: 3682 BUG(); 3683 } 3684 3685 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", 3686 p_name, dev_name_source); 3687 t->sysctl_header = 3688 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars); 3689 if (!t->sysctl_header) 3690 goto free; 3691 3692 p->sysctl_table = t; 3693 return 0; 3694 3695 free: 3696 kfree(t); 3697 err: 3698 return -ENOBUFS; 3699 } 3700 EXPORT_SYMBOL(neigh_sysctl_register); 3701 3702 void neigh_sysctl_unregister(struct neigh_parms *p) 3703 { 3704 if (p->sysctl_table) { 3705 struct neigh_sysctl_table *t = p->sysctl_table; 3706 p->sysctl_table = NULL; 3707 unregister_net_sysctl_table(t->sysctl_header); 3708 kfree(t); 3709 } 3710 } 3711 EXPORT_SYMBOL(neigh_sysctl_unregister); 3712 3713 #endif /* CONFIG_SYSCTL */ 3714 3715 static int __init neigh_init(void) 3716 { 3717 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0); 3718 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0); 3719 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0); 3720 3721 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, 3722 0); 3723 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0); 3724 3725 return 0; 3726 } 3727 3728 subsys_initcall(neigh_init); 3729