1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Generic address resolution entity 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 8 * 9 * Fixes: 10 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add. 11 * Harald Welte Add neighbour cache statistics like rtstat 12 */ 13 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 16 #include <linux/slab.h> 17 #include <linux/kmemleak.h> 18 #include <linux/types.h> 19 #include <linux/kernel.h> 20 #include <linux/module.h> 21 #include <linux/socket.h> 22 #include <linux/netdevice.h> 23 #include <linux/proc_fs.h> 24 #ifdef CONFIG_SYSCTL 25 #include <linux/sysctl.h> 26 #endif 27 #include <linux/times.h> 28 #include <net/net_namespace.h> 29 #include <net/neighbour.h> 30 #include <net/arp.h> 31 #include <net/dst.h> 32 #include <net/sock.h> 33 #include <net/netevent.h> 34 #include <net/netlink.h> 35 #include <linux/rtnetlink.h> 36 #include <linux/random.h> 37 #include <linux/string.h> 38 #include <linux/log2.h> 39 #include <linux/inetdevice.h> 40 #include <net/addrconf.h> 41 42 #include <trace/events/neigh.h> 43 44 #define DEBUG 45 #define NEIGH_DEBUG 1 46 #define neigh_dbg(level, fmt, ...) \ 47 do { \ 48 if (level <= NEIGH_DEBUG) \ 49 pr_debug(fmt, ##__VA_ARGS__); \ 50 } while (0) 51 52 #define PNEIGH_HASHMASK 0xF 53 54 static void neigh_timer_handler(struct timer_list *t); 55 static void __neigh_notify(struct neighbour *n, int type, int flags, 56 u32 pid); 57 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid); 58 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, 59 struct net_device *dev); 60 61 #ifdef CONFIG_PROC_FS 62 static const struct seq_operations neigh_stat_seq_ops; 63 #endif 64 65 /* 66 Neighbour hash table buckets are protected with rwlock tbl->lock. 67 68 - All the scans/updates to hash buckets MUST be made under this lock. 69 - NOTHING clever should be made under this lock: no callbacks 70 to protocol backends, no attempts to send something to network. 71 It will result in deadlocks, if backend/driver wants to use neighbour 72 cache. 73 - If the entry requires some non-trivial actions, increase 74 its reference count and release table lock. 75 76 Neighbour entries are protected: 77 - with reference count. 78 - with rwlock neigh->lock 79 80 Reference count prevents destruction. 81 82 neigh->lock mainly serializes ll address data and its validity state. 83 However, the same lock is used to protect another entry fields: 84 - timer 85 - resolution queue 86 87 Again, nothing clever shall be made under neigh->lock, 88 the most complicated procedure, which we allow is dev->hard_header. 89 It is supposed, that dev->hard_header is simplistic and does 90 not make callbacks to neighbour tables. 91 */ 92 93 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb) 94 { 95 kfree_skb(skb); 96 return -ENETDOWN; 97 } 98 99 static void neigh_cleanup_and_release(struct neighbour *neigh) 100 { 101 if (neigh->parms->neigh_cleanup) 102 neigh->parms->neigh_cleanup(neigh); 103 104 trace_neigh_cleanup_and_release(neigh, 0); 105 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0); 106 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); 107 neigh_release(neigh); 108 } 109 110 /* 111 * It is random distribution in the interval (1/2)*base...(3/2)*base. 112 * It corresponds to default IPv6 settings and is not overridable, 113 * because it is really reasonable choice. 114 */ 115 116 unsigned long neigh_rand_reach_time(unsigned long base) 117 { 118 return base ? (prandom_u32() % base) + (base >> 1) : 0; 119 } 120 EXPORT_SYMBOL(neigh_rand_reach_time); 121 122 static void neigh_mark_dead(struct neighbour *n) 123 { 124 n->dead = 1; 125 if (!list_empty(&n->gc_list)) { 126 list_del_init(&n->gc_list); 127 atomic_dec(&n->tbl->gc_entries); 128 } 129 } 130 131 static void neigh_update_gc_list(struct neighbour *n) 132 { 133 bool on_gc_list, exempt_from_gc; 134 135 write_lock_bh(&n->tbl->lock); 136 write_lock(&n->lock); 137 138 /* remove from the gc list if new state is permanent or if neighbor 139 * is externally learned; otherwise entry should be on the gc list 140 */ 141 exempt_from_gc = n->nud_state & NUD_PERMANENT || 142 n->flags & NTF_EXT_LEARNED; 143 on_gc_list = !list_empty(&n->gc_list); 144 145 if (exempt_from_gc && on_gc_list) { 146 list_del_init(&n->gc_list); 147 atomic_dec(&n->tbl->gc_entries); 148 } else if (!exempt_from_gc && !on_gc_list) { 149 /* add entries to the tail; cleaning removes from the front */ 150 list_add_tail(&n->gc_list, &n->tbl->gc_list); 151 atomic_inc(&n->tbl->gc_entries); 152 } 153 154 write_unlock(&n->lock); 155 write_unlock_bh(&n->tbl->lock); 156 } 157 158 static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags, 159 int *notify) 160 { 161 bool rc = false; 162 u8 ndm_flags; 163 164 if (!(flags & NEIGH_UPDATE_F_ADMIN)) 165 return rc; 166 167 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; 168 if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { 169 if (ndm_flags & NTF_EXT_LEARNED) 170 neigh->flags |= NTF_EXT_LEARNED; 171 else 172 neigh->flags &= ~NTF_EXT_LEARNED; 173 rc = true; 174 *notify = 1; 175 } 176 177 return rc; 178 } 179 180 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, 181 struct neigh_table *tbl) 182 { 183 bool retval = false; 184 185 write_lock(&n->lock); 186 if (refcount_read(&n->refcnt) == 1) { 187 struct neighbour *neigh; 188 189 neigh = rcu_dereference_protected(n->next, 190 lockdep_is_held(&tbl->lock)); 191 rcu_assign_pointer(*np, neigh); 192 neigh_mark_dead(n); 193 retval = true; 194 } 195 write_unlock(&n->lock); 196 if (retval) 197 neigh_cleanup_and_release(n); 198 return retval; 199 } 200 201 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) 202 { 203 struct neigh_hash_table *nht; 204 void *pkey = ndel->primary_key; 205 u32 hash_val; 206 struct neighbour *n; 207 struct neighbour __rcu **np; 208 209 nht = rcu_dereference_protected(tbl->nht, 210 lockdep_is_held(&tbl->lock)); 211 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd); 212 hash_val = hash_val >> (32 - nht->hash_shift); 213 214 np = &nht->hash_buckets[hash_val]; 215 while ((n = rcu_dereference_protected(*np, 216 lockdep_is_held(&tbl->lock)))) { 217 if (n == ndel) 218 return neigh_del(n, np, tbl); 219 np = &n->next; 220 } 221 return false; 222 } 223 224 static int neigh_forced_gc(struct neigh_table *tbl) 225 { 226 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2; 227 unsigned long tref = jiffies - 5 * HZ; 228 struct neighbour *n, *tmp; 229 int shrunk = 0; 230 231 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); 232 233 write_lock_bh(&tbl->lock); 234 235 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) { 236 if (refcount_read(&n->refcnt) == 1) { 237 bool remove = false; 238 239 write_lock(&n->lock); 240 if ((n->nud_state == NUD_FAILED) || 241 time_after(tref, n->updated)) 242 remove = true; 243 write_unlock(&n->lock); 244 245 if (remove && neigh_remove_one(n, tbl)) 246 shrunk++; 247 if (shrunk >= max_clean) 248 break; 249 } 250 } 251 252 tbl->last_flush = jiffies; 253 254 write_unlock_bh(&tbl->lock); 255 256 return shrunk; 257 } 258 259 static void neigh_add_timer(struct neighbour *n, unsigned long when) 260 { 261 neigh_hold(n); 262 if (unlikely(mod_timer(&n->timer, when))) { 263 printk("NEIGH: BUG, double timer add, state is %x\n", 264 n->nud_state); 265 dump_stack(); 266 } 267 } 268 269 static int neigh_del_timer(struct neighbour *n) 270 { 271 if ((n->nud_state & NUD_IN_TIMER) && 272 del_timer(&n->timer)) { 273 neigh_release(n); 274 return 1; 275 } 276 return 0; 277 } 278 279 static void pneigh_queue_purge(struct sk_buff_head *list) 280 { 281 struct sk_buff *skb; 282 283 while ((skb = skb_dequeue(list)) != NULL) { 284 dev_put(skb->dev); 285 kfree_skb(skb); 286 } 287 } 288 289 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, 290 bool skip_perm) 291 { 292 int i; 293 struct neigh_hash_table *nht; 294 295 nht = rcu_dereference_protected(tbl->nht, 296 lockdep_is_held(&tbl->lock)); 297 298 for (i = 0; i < (1 << nht->hash_shift); i++) { 299 struct neighbour *n; 300 struct neighbour __rcu **np = &nht->hash_buckets[i]; 301 302 while ((n = rcu_dereference_protected(*np, 303 lockdep_is_held(&tbl->lock))) != NULL) { 304 if (dev && n->dev != dev) { 305 np = &n->next; 306 continue; 307 } 308 if (skip_perm && n->nud_state & NUD_PERMANENT) { 309 np = &n->next; 310 continue; 311 } 312 rcu_assign_pointer(*np, 313 rcu_dereference_protected(n->next, 314 lockdep_is_held(&tbl->lock))); 315 write_lock(&n->lock); 316 neigh_del_timer(n); 317 neigh_mark_dead(n); 318 if (refcount_read(&n->refcnt) != 1) { 319 /* The most unpleasant situation. 320 We must destroy neighbour entry, 321 but someone still uses it. 322 323 The destroy will be delayed until 324 the last user releases us, but 325 we must kill timers etc. and move 326 it to safe state. 327 */ 328 __skb_queue_purge(&n->arp_queue); 329 n->arp_queue_len_bytes = 0; 330 n->output = neigh_blackhole; 331 if (n->nud_state & NUD_VALID) 332 n->nud_state = NUD_NOARP; 333 else 334 n->nud_state = NUD_NONE; 335 neigh_dbg(2, "neigh %p is stray\n", n); 336 } 337 write_unlock(&n->lock); 338 neigh_cleanup_and_release(n); 339 } 340 } 341 } 342 343 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) 344 { 345 write_lock_bh(&tbl->lock); 346 neigh_flush_dev(tbl, dev, false); 347 write_unlock_bh(&tbl->lock); 348 } 349 EXPORT_SYMBOL(neigh_changeaddr); 350 351 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, 352 bool skip_perm) 353 { 354 write_lock_bh(&tbl->lock); 355 neigh_flush_dev(tbl, dev, skip_perm); 356 pneigh_ifdown_and_unlock(tbl, dev); 357 358 del_timer_sync(&tbl->proxy_timer); 359 pneigh_queue_purge(&tbl->proxy_queue); 360 return 0; 361 } 362 363 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev) 364 { 365 __neigh_ifdown(tbl, dev, true); 366 return 0; 367 } 368 EXPORT_SYMBOL(neigh_carrier_down); 369 370 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) 371 { 372 __neigh_ifdown(tbl, dev, false); 373 return 0; 374 } 375 EXPORT_SYMBOL(neigh_ifdown); 376 377 static struct neighbour *neigh_alloc(struct neigh_table *tbl, 378 struct net_device *dev, 379 bool exempt_from_gc) 380 { 381 struct neighbour *n = NULL; 382 unsigned long now = jiffies; 383 int entries; 384 385 if (exempt_from_gc) 386 goto do_alloc; 387 388 entries = atomic_inc_return(&tbl->gc_entries) - 1; 389 if (entries >= tbl->gc_thresh3 || 390 (entries >= tbl->gc_thresh2 && 391 time_after(now, tbl->last_flush + 5 * HZ))) { 392 if (!neigh_forced_gc(tbl) && 393 entries >= tbl->gc_thresh3) { 394 net_info_ratelimited("%s: neighbor table overflow!\n", 395 tbl->id); 396 NEIGH_CACHE_STAT_INC(tbl, table_fulls); 397 goto out_entries; 398 } 399 } 400 401 do_alloc: 402 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC); 403 if (!n) 404 goto out_entries; 405 406 __skb_queue_head_init(&n->arp_queue); 407 rwlock_init(&n->lock); 408 seqlock_init(&n->ha_lock); 409 n->updated = n->used = now; 410 n->nud_state = NUD_NONE; 411 n->output = neigh_blackhole; 412 seqlock_init(&n->hh.hh_lock); 413 n->parms = neigh_parms_clone(&tbl->parms); 414 timer_setup(&n->timer, neigh_timer_handler, 0); 415 416 NEIGH_CACHE_STAT_INC(tbl, allocs); 417 n->tbl = tbl; 418 refcount_set(&n->refcnt, 1); 419 n->dead = 1; 420 INIT_LIST_HEAD(&n->gc_list); 421 422 atomic_inc(&tbl->entries); 423 out: 424 return n; 425 426 out_entries: 427 if (!exempt_from_gc) 428 atomic_dec(&tbl->gc_entries); 429 goto out; 430 } 431 432 static void neigh_get_hash_rnd(u32 *x) 433 { 434 *x = get_random_u32() | 1; 435 } 436 437 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) 438 { 439 size_t size = (1 << shift) * sizeof(struct neighbour *); 440 struct neigh_hash_table *ret; 441 struct neighbour __rcu **buckets; 442 int i; 443 444 ret = kmalloc(sizeof(*ret), GFP_ATOMIC); 445 if (!ret) 446 return NULL; 447 if (size <= PAGE_SIZE) { 448 buckets = kzalloc(size, GFP_ATOMIC); 449 } else { 450 buckets = (struct neighbour __rcu **) 451 __get_free_pages(GFP_ATOMIC | __GFP_ZERO, 452 get_order(size)); 453 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); 454 } 455 if (!buckets) { 456 kfree(ret); 457 return NULL; 458 } 459 ret->hash_buckets = buckets; 460 ret->hash_shift = shift; 461 for (i = 0; i < NEIGH_NUM_HASH_RND; i++) 462 neigh_get_hash_rnd(&ret->hash_rnd[i]); 463 return ret; 464 } 465 466 static void neigh_hash_free_rcu(struct rcu_head *head) 467 { 468 struct neigh_hash_table *nht = container_of(head, 469 struct neigh_hash_table, 470 rcu); 471 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); 472 struct neighbour __rcu **buckets = nht->hash_buckets; 473 474 if (size <= PAGE_SIZE) { 475 kfree(buckets); 476 } else { 477 kmemleak_free(buckets); 478 free_pages((unsigned long)buckets, get_order(size)); 479 } 480 kfree(nht); 481 } 482 483 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, 484 unsigned long new_shift) 485 { 486 unsigned int i, hash; 487 struct neigh_hash_table *new_nht, *old_nht; 488 489 NEIGH_CACHE_STAT_INC(tbl, hash_grows); 490 491 old_nht = rcu_dereference_protected(tbl->nht, 492 lockdep_is_held(&tbl->lock)); 493 new_nht = neigh_hash_alloc(new_shift); 494 if (!new_nht) 495 return old_nht; 496 497 for (i = 0; i < (1 << old_nht->hash_shift); i++) { 498 struct neighbour *n, *next; 499 500 for (n = rcu_dereference_protected(old_nht->hash_buckets[i], 501 lockdep_is_held(&tbl->lock)); 502 n != NULL; 503 n = next) { 504 hash = tbl->hash(n->primary_key, n->dev, 505 new_nht->hash_rnd); 506 507 hash >>= (32 - new_nht->hash_shift); 508 next = rcu_dereference_protected(n->next, 509 lockdep_is_held(&tbl->lock)); 510 511 rcu_assign_pointer(n->next, 512 rcu_dereference_protected( 513 new_nht->hash_buckets[hash], 514 lockdep_is_held(&tbl->lock))); 515 rcu_assign_pointer(new_nht->hash_buckets[hash], n); 516 } 517 } 518 519 rcu_assign_pointer(tbl->nht, new_nht); 520 call_rcu(&old_nht->rcu, neigh_hash_free_rcu); 521 return new_nht; 522 } 523 524 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, 525 struct net_device *dev) 526 { 527 struct neighbour *n; 528 529 NEIGH_CACHE_STAT_INC(tbl, lookups); 530 531 rcu_read_lock_bh(); 532 n = __neigh_lookup_noref(tbl, pkey, dev); 533 if (n) { 534 if (!refcount_inc_not_zero(&n->refcnt)) 535 n = NULL; 536 NEIGH_CACHE_STAT_INC(tbl, hits); 537 } 538 539 rcu_read_unlock_bh(); 540 return n; 541 } 542 EXPORT_SYMBOL(neigh_lookup); 543 544 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, 545 const void *pkey) 546 { 547 struct neighbour *n; 548 unsigned int key_len = tbl->key_len; 549 u32 hash_val; 550 struct neigh_hash_table *nht; 551 552 NEIGH_CACHE_STAT_INC(tbl, lookups); 553 554 rcu_read_lock_bh(); 555 nht = rcu_dereference_bh(tbl->nht); 556 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift); 557 558 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); 559 n != NULL; 560 n = rcu_dereference_bh(n->next)) { 561 if (!memcmp(n->primary_key, pkey, key_len) && 562 net_eq(dev_net(n->dev), net)) { 563 if (!refcount_inc_not_zero(&n->refcnt)) 564 n = NULL; 565 NEIGH_CACHE_STAT_INC(tbl, hits); 566 break; 567 } 568 } 569 570 rcu_read_unlock_bh(); 571 return n; 572 } 573 EXPORT_SYMBOL(neigh_lookup_nodev); 574 575 static struct neighbour *___neigh_create(struct neigh_table *tbl, 576 const void *pkey, 577 struct net_device *dev, 578 bool exempt_from_gc, bool want_ref) 579 { 580 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc); 581 u32 hash_val; 582 unsigned int key_len = tbl->key_len; 583 int error; 584 struct neigh_hash_table *nht; 585 586 trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc); 587 588 if (!n) { 589 rc = ERR_PTR(-ENOBUFS); 590 goto out; 591 } 592 593 memcpy(n->primary_key, pkey, key_len); 594 n->dev = dev; 595 dev_hold(dev); 596 597 /* Protocol specific setup. */ 598 if (tbl->constructor && (error = tbl->constructor(n)) < 0) { 599 rc = ERR_PTR(error); 600 goto out_neigh_release; 601 } 602 603 if (dev->netdev_ops->ndo_neigh_construct) { 604 error = dev->netdev_ops->ndo_neigh_construct(dev, n); 605 if (error < 0) { 606 rc = ERR_PTR(error); 607 goto out_neigh_release; 608 } 609 } 610 611 /* Device specific setup. */ 612 if (n->parms->neigh_setup && 613 (error = n->parms->neigh_setup(n)) < 0) { 614 rc = ERR_PTR(error); 615 goto out_neigh_release; 616 } 617 618 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1); 619 620 write_lock_bh(&tbl->lock); 621 nht = rcu_dereference_protected(tbl->nht, 622 lockdep_is_held(&tbl->lock)); 623 624 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) 625 nht = neigh_hash_grow(tbl, nht->hash_shift + 1); 626 627 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift); 628 629 if (n->parms->dead) { 630 rc = ERR_PTR(-EINVAL); 631 goto out_tbl_unlock; 632 } 633 634 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], 635 lockdep_is_held(&tbl->lock)); 636 n1 != NULL; 637 n1 = rcu_dereference_protected(n1->next, 638 lockdep_is_held(&tbl->lock))) { 639 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { 640 if (want_ref) 641 neigh_hold(n1); 642 rc = n1; 643 goto out_tbl_unlock; 644 } 645 } 646 647 n->dead = 0; 648 if (!exempt_from_gc) 649 list_add_tail(&n->gc_list, &n->tbl->gc_list); 650 651 if (want_ref) 652 neigh_hold(n); 653 rcu_assign_pointer(n->next, 654 rcu_dereference_protected(nht->hash_buckets[hash_val], 655 lockdep_is_held(&tbl->lock))); 656 rcu_assign_pointer(nht->hash_buckets[hash_val], n); 657 write_unlock_bh(&tbl->lock); 658 neigh_dbg(2, "neigh %p is created\n", n); 659 rc = n; 660 out: 661 return rc; 662 out_tbl_unlock: 663 write_unlock_bh(&tbl->lock); 664 out_neigh_release: 665 if (!exempt_from_gc) 666 atomic_dec(&tbl->gc_entries); 667 neigh_release(n); 668 goto out; 669 } 670 671 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, 672 struct net_device *dev, bool want_ref) 673 { 674 return ___neigh_create(tbl, pkey, dev, false, want_ref); 675 } 676 EXPORT_SYMBOL(__neigh_create); 677 678 static u32 pneigh_hash(const void *pkey, unsigned int key_len) 679 { 680 u32 hash_val = *(u32 *)(pkey + key_len - 4); 681 hash_val ^= (hash_val >> 16); 682 hash_val ^= hash_val >> 8; 683 hash_val ^= hash_val >> 4; 684 hash_val &= PNEIGH_HASHMASK; 685 return hash_val; 686 } 687 688 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, 689 struct net *net, 690 const void *pkey, 691 unsigned int key_len, 692 struct net_device *dev) 693 { 694 while (n) { 695 if (!memcmp(n->key, pkey, key_len) && 696 net_eq(pneigh_net(n), net) && 697 (n->dev == dev || !n->dev)) 698 return n; 699 n = n->next; 700 } 701 return NULL; 702 } 703 704 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, 705 struct net *net, const void *pkey, struct net_device *dev) 706 { 707 unsigned int key_len = tbl->key_len; 708 u32 hash_val = pneigh_hash(pkey, key_len); 709 710 return __pneigh_lookup_1(tbl->phash_buckets[hash_val], 711 net, pkey, key_len, dev); 712 } 713 EXPORT_SYMBOL_GPL(__pneigh_lookup); 714 715 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, 716 struct net *net, const void *pkey, 717 struct net_device *dev, int creat) 718 { 719 struct pneigh_entry *n; 720 unsigned int key_len = tbl->key_len; 721 u32 hash_val = pneigh_hash(pkey, key_len); 722 723 read_lock_bh(&tbl->lock); 724 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val], 725 net, pkey, key_len, dev); 726 read_unlock_bh(&tbl->lock); 727 728 if (n || !creat) 729 goto out; 730 731 ASSERT_RTNL(); 732 733 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); 734 if (!n) 735 goto out; 736 737 n->protocol = 0; 738 write_pnet(&n->net, net); 739 memcpy(n->key, pkey, key_len); 740 n->dev = dev; 741 if (dev) 742 dev_hold(dev); 743 744 if (tbl->pconstructor && tbl->pconstructor(n)) { 745 if (dev) 746 dev_put(dev); 747 kfree(n); 748 n = NULL; 749 goto out; 750 } 751 752 write_lock_bh(&tbl->lock); 753 n->next = tbl->phash_buckets[hash_val]; 754 tbl->phash_buckets[hash_val] = n; 755 write_unlock_bh(&tbl->lock); 756 out: 757 return n; 758 } 759 EXPORT_SYMBOL(pneigh_lookup); 760 761 762 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, 763 struct net_device *dev) 764 { 765 struct pneigh_entry *n, **np; 766 unsigned int key_len = tbl->key_len; 767 u32 hash_val = pneigh_hash(pkey, key_len); 768 769 write_lock_bh(&tbl->lock); 770 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; 771 np = &n->next) { 772 if (!memcmp(n->key, pkey, key_len) && n->dev == dev && 773 net_eq(pneigh_net(n), net)) { 774 *np = n->next; 775 write_unlock_bh(&tbl->lock); 776 if (tbl->pdestructor) 777 tbl->pdestructor(n); 778 if (n->dev) 779 dev_put(n->dev); 780 kfree(n); 781 return 0; 782 } 783 } 784 write_unlock_bh(&tbl->lock); 785 return -ENOENT; 786 } 787 788 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, 789 struct net_device *dev) 790 { 791 struct pneigh_entry *n, **np, *freelist = NULL; 792 u32 h; 793 794 for (h = 0; h <= PNEIGH_HASHMASK; h++) { 795 np = &tbl->phash_buckets[h]; 796 while ((n = *np) != NULL) { 797 if (!dev || n->dev == dev) { 798 *np = n->next; 799 n->next = freelist; 800 freelist = n; 801 continue; 802 } 803 np = &n->next; 804 } 805 } 806 write_unlock_bh(&tbl->lock); 807 while ((n = freelist)) { 808 freelist = n->next; 809 n->next = NULL; 810 if (tbl->pdestructor) 811 tbl->pdestructor(n); 812 if (n->dev) 813 dev_put(n->dev); 814 kfree(n); 815 } 816 return -ENOENT; 817 } 818 819 static void neigh_parms_destroy(struct neigh_parms *parms); 820 821 static inline void neigh_parms_put(struct neigh_parms *parms) 822 { 823 if (refcount_dec_and_test(&parms->refcnt)) 824 neigh_parms_destroy(parms); 825 } 826 827 /* 828 * neighbour must already be out of the table; 829 * 830 */ 831 void neigh_destroy(struct neighbour *neigh) 832 { 833 struct net_device *dev = neigh->dev; 834 835 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); 836 837 if (!neigh->dead) { 838 pr_warn("Destroying alive neighbour %p\n", neigh); 839 dump_stack(); 840 return; 841 } 842 843 if (neigh_del_timer(neigh)) 844 pr_warn("Impossible event\n"); 845 846 write_lock_bh(&neigh->lock); 847 __skb_queue_purge(&neigh->arp_queue); 848 write_unlock_bh(&neigh->lock); 849 neigh->arp_queue_len_bytes = 0; 850 851 if (dev->netdev_ops->ndo_neigh_destroy) 852 dev->netdev_ops->ndo_neigh_destroy(dev, neigh); 853 854 dev_put(dev); 855 neigh_parms_put(neigh->parms); 856 857 neigh_dbg(2, "neigh %p is destroyed\n", neigh); 858 859 atomic_dec(&neigh->tbl->entries); 860 kfree_rcu(neigh, rcu); 861 } 862 EXPORT_SYMBOL(neigh_destroy); 863 864 /* Neighbour state is suspicious; 865 disable fast path. 866 867 Called with write_locked neigh. 868 */ 869 static void neigh_suspect(struct neighbour *neigh) 870 { 871 neigh_dbg(2, "neigh %p is suspected\n", neigh); 872 873 neigh->output = neigh->ops->output; 874 } 875 876 /* Neighbour state is OK; 877 enable fast path. 878 879 Called with write_locked neigh. 880 */ 881 static void neigh_connect(struct neighbour *neigh) 882 { 883 neigh_dbg(2, "neigh %p is connected\n", neigh); 884 885 neigh->output = neigh->ops->connected_output; 886 } 887 888 static void neigh_periodic_work(struct work_struct *work) 889 { 890 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); 891 struct neighbour *n; 892 struct neighbour __rcu **np; 893 unsigned int i; 894 struct neigh_hash_table *nht; 895 896 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); 897 898 write_lock_bh(&tbl->lock); 899 nht = rcu_dereference_protected(tbl->nht, 900 lockdep_is_held(&tbl->lock)); 901 902 /* 903 * periodically recompute ReachableTime from random function 904 */ 905 906 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) { 907 struct neigh_parms *p; 908 tbl->last_rand = jiffies; 909 list_for_each_entry(p, &tbl->parms_list, list) 910 p->reachable_time = 911 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 912 } 913 914 if (atomic_read(&tbl->entries) < tbl->gc_thresh1) 915 goto out; 916 917 for (i = 0 ; i < (1 << nht->hash_shift); i++) { 918 np = &nht->hash_buckets[i]; 919 920 while ((n = rcu_dereference_protected(*np, 921 lockdep_is_held(&tbl->lock))) != NULL) { 922 unsigned int state; 923 924 write_lock(&n->lock); 925 926 state = n->nud_state; 927 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) || 928 (n->flags & NTF_EXT_LEARNED)) { 929 write_unlock(&n->lock); 930 goto next_elt; 931 } 932 933 if (time_before(n->used, n->confirmed)) 934 n->used = n->confirmed; 935 936 if (refcount_read(&n->refcnt) == 1 && 937 (state == NUD_FAILED || 938 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { 939 *np = n->next; 940 neigh_mark_dead(n); 941 write_unlock(&n->lock); 942 neigh_cleanup_and_release(n); 943 continue; 944 } 945 write_unlock(&n->lock); 946 947 next_elt: 948 np = &n->next; 949 } 950 /* 951 * It's fine to release lock here, even if hash table 952 * grows while we are preempted. 953 */ 954 write_unlock_bh(&tbl->lock); 955 cond_resched(); 956 write_lock_bh(&tbl->lock); 957 nht = rcu_dereference_protected(tbl->nht, 958 lockdep_is_held(&tbl->lock)); 959 } 960 out: 961 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks. 962 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2 963 * BASE_REACHABLE_TIME. 964 */ 965 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 966 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1); 967 write_unlock_bh(&tbl->lock); 968 } 969 970 static __inline__ int neigh_max_probes(struct neighbour *n) 971 { 972 struct neigh_parms *p = n->parms; 973 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + 974 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) : 975 NEIGH_VAR(p, MCAST_PROBES)); 976 } 977 978 static void neigh_invalidate(struct neighbour *neigh) 979 __releases(neigh->lock) 980 __acquires(neigh->lock) 981 { 982 struct sk_buff *skb; 983 984 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); 985 neigh_dbg(2, "neigh %p is failed\n", neigh); 986 neigh->updated = jiffies; 987 988 /* It is very thin place. report_unreachable is very complicated 989 routine. Particularly, it can hit the same neighbour entry! 990 991 So that, we try to be accurate and avoid dead loop. --ANK 992 */ 993 while (neigh->nud_state == NUD_FAILED && 994 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { 995 write_unlock(&neigh->lock); 996 neigh->ops->error_report(neigh, skb); 997 write_lock(&neigh->lock); 998 } 999 __skb_queue_purge(&neigh->arp_queue); 1000 neigh->arp_queue_len_bytes = 0; 1001 } 1002 1003 static void neigh_probe(struct neighbour *neigh) 1004 __releases(neigh->lock) 1005 { 1006 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue); 1007 /* keep skb alive even if arp_queue overflows */ 1008 if (skb) 1009 skb = skb_clone(skb, GFP_ATOMIC); 1010 write_unlock(&neigh->lock); 1011 if (neigh->ops->solicit) 1012 neigh->ops->solicit(neigh, skb); 1013 atomic_inc(&neigh->probes); 1014 consume_skb(skb); 1015 } 1016 1017 /* Called when a timer expires for a neighbour entry. */ 1018 1019 static void neigh_timer_handler(struct timer_list *t) 1020 { 1021 unsigned long now, next; 1022 struct neighbour *neigh = from_timer(neigh, t, timer); 1023 unsigned int state; 1024 int notify = 0; 1025 1026 write_lock(&neigh->lock); 1027 1028 state = neigh->nud_state; 1029 now = jiffies; 1030 next = now + HZ; 1031 1032 if (!(state & NUD_IN_TIMER)) 1033 goto out; 1034 1035 if (state & NUD_REACHABLE) { 1036 if (time_before_eq(now, 1037 neigh->confirmed + neigh->parms->reachable_time)) { 1038 neigh_dbg(2, "neigh %p is still alive\n", neigh); 1039 next = neigh->confirmed + neigh->parms->reachable_time; 1040 } else if (time_before_eq(now, 1041 neigh->used + 1042 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { 1043 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1044 neigh->nud_state = NUD_DELAY; 1045 neigh->updated = jiffies; 1046 neigh_suspect(neigh); 1047 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); 1048 } else { 1049 neigh_dbg(2, "neigh %p is suspected\n", neigh); 1050 neigh->nud_state = NUD_STALE; 1051 neigh->updated = jiffies; 1052 neigh_suspect(neigh); 1053 notify = 1; 1054 } 1055 } else if (state & NUD_DELAY) { 1056 if (time_before_eq(now, 1057 neigh->confirmed + 1058 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { 1059 neigh_dbg(2, "neigh %p is now reachable\n", neigh); 1060 neigh->nud_state = NUD_REACHABLE; 1061 neigh->updated = jiffies; 1062 neigh_connect(neigh); 1063 notify = 1; 1064 next = neigh->confirmed + neigh->parms->reachable_time; 1065 } else { 1066 neigh_dbg(2, "neigh %p is probed\n", neigh); 1067 neigh->nud_state = NUD_PROBE; 1068 neigh->updated = jiffies; 1069 atomic_set(&neigh->probes, 0); 1070 notify = 1; 1071 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); 1072 } 1073 } else { 1074 /* NUD_PROBE|NUD_INCOMPLETE */ 1075 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); 1076 } 1077 1078 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && 1079 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { 1080 neigh->nud_state = NUD_FAILED; 1081 notify = 1; 1082 neigh_invalidate(neigh); 1083 goto out; 1084 } 1085 1086 if (neigh->nud_state & NUD_IN_TIMER) { 1087 if (time_before(next, jiffies + HZ/2)) 1088 next = jiffies + HZ/2; 1089 if (!mod_timer(&neigh->timer, next)) 1090 neigh_hold(neigh); 1091 } 1092 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { 1093 neigh_probe(neigh); 1094 } else { 1095 out: 1096 write_unlock(&neigh->lock); 1097 } 1098 1099 if (notify) 1100 neigh_update_notify(neigh, 0); 1101 1102 trace_neigh_timer_handler(neigh, 0); 1103 1104 neigh_release(neigh); 1105 } 1106 1107 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) 1108 { 1109 int rc; 1110 bool immediate_probe = false; 1111 1112 write_lock_bh(&neigh->lock); 1113 1114 rc = 0; 1115 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) 1116 goto out_unlock_bh; 1117 if (neigh->dead) 1118 goto out_dead; 1119 1120 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { 1121 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + 1122 NEIGH_VAR(neigh->parms, APP_PROBES)) { 1123 unsigned long next, now = jiffies; 1124 1125 atomic_set(&neigh->probes, 1126 NEIGH_VAR(neigh->parms, UCAST_PROBES)); 1127 neigh->nud_state = NUD_INCOMPLETE; 1128 neigh->updated = now; 1129 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), 1130 HZ/2); 1131 neigh_add_timer(neigh, next); 1132 immediate_probe = true; 1133 } else { 1134 neigh->nud_state = NUD_FAILED; 1135 neigh->updated = jiffies; 1136 write_unlock_bh(&neigh->lock); 1137 1138 kfree_skb(skb); 1139 return 1; 1140 } 1141 } else if (neigh->nud_state & NUD_STALE) { 1142 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1143 neigh->nud_state = NUD_DELAY; 1144 neigh->updated = jiffies; 1145 neigh_add_timer(neigh, jiffies + 1146 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME)); 1147 } 1148 1149 if (neigh->nud_state == NUD_INCOMPLETE) { 1150 if (skb) { 1151 while (neigh->arp_queue_len_bytes + skb->truesize > 1152 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) { 1153 struct sk_buff *buff; 1154 1155 buff = __skb_dequeue(&neigh->arp_queue); 1156 if (!buff) 1157 break; 1158 neigh->arp_queue_len_bytes -= buff->truesize; 1159 kfree_skb(buff); 1160 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); 1161 } 1162 skb_dst_force(skb); 1163 __skb_queue_tail(&neigh->arp_queue, skb); 1164 neigh->arp_queue_len_bytes += skb->truesize; 1165 } 1166 rc = 1; 1167 } 1168 out_unlock_bh: 1169 if (immediate_probe) 1170 neigh_probe(neigh); 1171 else 1172 write_unlock(&neigh->lock); 1173 local_bh_enable(); 1174 trace_neigh_event_send_done(neigh, rc); 1175 return rc; 1176 1177 out_dead: 1178 if (neigh->nud_state & NUD_STALE) 1179 goto out_unlock_bh; 1180 write_unlock_bh(&neigh->lock); 1181 kfree_skb(skb); 1182 trace_neigh_event_send_dead(neigh, 1); 1183 return 1; 1184 } 1185 EXPORT_SYMBOL(__neigh_event_send); 1186 1187 static void neigh_update_hhs(struct neighbour *neigh) 1188 { 1189 struct hh_cache *hh; 1190 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) 1191 = NULL; 1192 1193 if (neigh->dev->header_ops) 1194 update = neigh->dev->header_ops->cache_update; 1195 1196 if (update) { 1197 hh = &neigh->hh; 1198 if (hh->hh_len) { 1199 write_seqlock_bh(&hh->hh_lock); 1200 update(hh, neigh->dev, neigh->ha); 1201 write_sequnlock_bh(&hh->hh_lock); 1202 } 1203 } 1204 } 1205 1206 1207 1208 /* Generic update routine. 1209 -- lladdr is new lladdr or NULL, if it is not supplied. 1210 -- new is new state. 1211 -- flags 1212 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr, 1213 if it is different. 1214 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected" 1215 lladdr instead of overriding it 1216 if it is different. 1217 NEIGH_UPDATE_F_ADMIN means that the change is administrative. 1218 1219 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing 1220 NTF_ROUTER flag. 1221 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as 1222 a router. 1223 1224 Caller MUST hold reference count on the entry. 1225 */ 1226 1227 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, 1228 u8 new, u32 flags, u32 nlmsg_pid, 1229 struct netlink_ext_ack *extack) 1230 { 1231 bool ext_learn_change = false; 1232 u8 old; 1233 int err; 1234 int notify = 0; 1235 struct net_device *dev; 1236 int update_isrouter = 0; 1237 1238 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid); 1239 1240 write_lock_bh(&neigh->lock); 1241 1242 dev = neigh->dev; 1243 old = neigh->nud_state; 1244 err = -EPERM; 1245 1246 if (!(flags & NEIGH_UPDATE_F_ADMIN) && 1247 (old & (NUD_NOARP | NUD_PERMANENT))) 1248 goto out; 1249 if (neigh->dead) { 1250 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead"); 1251 goto out; 1252 } 1253 1254 ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); 1255 1256 if (!(new & NUD_VALID)) { 1257 neigh_del_timer(neigh); 1258 if (old & NUD_CONNECTED) 1259 neigh_suspect(neigh); 1260 neigh->nud_state = new; 1261 err = 0; 1262 notify = old & NUD_VALID; 1263 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && 1264 (new & NUD_FAILED)) { 1265 neigh_invalidate(neigh); 1266 notify = 1; 1267 } 1268 goto out; 1269 } 1270 1271 /* Compare new lladdr with cached one */ 1272 if (!dev->addr_len) { 1273 /* First case: device needs no address. */ 1274 lladdr = neigh->ha; 1275 } else if (lladdr) { 1276 /* The second case: if something is already cached 1277 and a new address is proposed: 1278 - compare new & old 1279 - if they are different, check override flag 1280 */ 1281 if ((old & NUD_VALID) && 1282 !memcmp(lladdr, neigh->ha, dev->addr_len)) 1283 lladdr = neigh->ha; 1284 } else { 1285 /* No address is supplied; if we know something, 1286 use it, otherwise discard the request. 1287 */ 1288 err = -EINVAL; 1289 if (!(old & NUD_VALID)) { 1290 NL_SET_ERR_MSG(extack, "No link layer address given"); 1291 goto out; 1292 } 1293 lladdr = neigh->ha; 1294 } 1295 1296 /* Update confirmed timestamp for neighbour entry after we 1297 * received ARP packet even if it doesn't change IP to MAC binding. 1298 */ 1299 if (new & NUD_CONNECTED) 1300 neigh->confirmed = jiffies; 1301 1302 /* If entry was valid and address is not changed, 1303 do not change entry state, if new one is STALE. 1304 */ 1305 err = 0; 1306 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER; 1307 if (old & NUD_VALID) { 1308 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) { 1309 update_isrouter = 0; 1310 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) && 1311 (old & NUD_CONNECTED)) { 1312 lladdr = neigh->ha; 1313 new = NUD_STALE; 1314 } else 1315 goto out; 1316 } else { 1317 if (lladdr == neigh->ha && new == NUD_STALE && 1318 !(flags & NEIGH_UPDATE_F_ADMIN)) 1319 new = old; 1320 } 1321 } 1322 1323 /* Update timestamp only once we know we will make a change to the 1324 * neighbour entry. Otherwise we risk to move the locktime window with 1325 * noop updates and ignore relevant ARP updates. 1326 */ 1327 if (new != old || lladdr != neigh->ha) 1328 neigh->updated = jiffies; 1329 1330 if (new != old) { 1331 neigh_del_timer(neigh); 1332 if (new & NUD_PROBE) 1333 atomic_set(&neigh->probes, 0); 1334 if (new & NUD_IN_TIMER) 1335 neigh_add_timer(neigh, (jiffies + 1336 ((new & NUD_REACHABLE) ? 1337 neigh->parms->reachable_time : 1338 0))); 1339 neigh->nud_state = new; 1340 notify = 1; 1341 } 1342 1343 if (lladdr != neigh->ha) { 1344 write_seqlock(&neigh->ha_lock); 1345 memcpy(&neigh->ha, lladdr, dev->addr_len); 1346 write_sequnlock(&neigh->ha_lock); 1347 neigh_update_hhs(neigh); 1348 if (!(new & NUD_CONNECTED)) 1349 neigh->confirmed = jiffies - 1350 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1); 1351 notify = 1; 1352 } 1353 if (new == old) 1354 goto out; 1355 if (new & NUD_CONNECTED) 1356 neigh_connect(neigh); 1357 else 1358 neigh_suspect(neigh); 1359 if (!(old & NUD_VALID)) { 1360 struct sk_buff *skb; 1361 1362 /* Again: avoid dead loop if something went wrong */ 1363 1364 while (neigh->nud_state & NUD_VALID && 1365 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { 1366 struct dst_entry *dst = skb_dst(skb); 1367 struct neighbour *n2, *n1 = neigh; 1368 write_unlock_bh(&neigh->lock); 1369 1370 rcu_read_lock(); 1371 1372 /* Why not just use 'neigh' as-is? The problem is that 1373 * things such as shaper, eql, and sch_teql can end up 1374 * using alternative, different, neigh objects to output 1375 * the packet in the output path. So what we need to do 1376 * here is re-lookup the top-level neigh in the path so 1377 * we can reinject the packet there. 1378 */ 1379 n2 = NULL; 1380 if (dst) { 1381 n2 = dst_neigh_lookup_skb(dst, skb); 1382 if (n2) 1383 n1 = n2; 1384 } 1385 n1->output(n1, skb); 1386 if (n2) 1387 neigh_release(n2); 1388 rcu_read_unlock(); 1389 1390 write_lock_bh(&neigh->lock); 1391 } 1392 __skb_queue_purge(&neigh->arp_queue); 1393 neigh->arp_queue_len_bytes = 0; 1394 } 1395 out: 1396 if (update_isrouter) 1397 neigh_update_is_router(neigh, flags, ¬ify); 1398 write_unlock_bh(&neigh->lock); 1399 1400 if (((new ^ old) & NUD_PERMANENT) || ext_learn_change) 1401 neigh_update_gc_list(neigh); 1402 1403 if (notify) 1404 neigh_update_notify(neigh, nlmsg_pid); 1405 1406 trace_neigh_update_done(neigh, err); 1407 1408 return err; 1409 } 1410 1411 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, 1412 u32 flags, u32 nlmsg_pid) 1413 { 1414 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL); 1415 } 1416 EXPORT_SYMBOL(neigh_update); 1417 1418 /* Update the neigh to listen temporarily for probe responses, even if it is 1419 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing. 1420 */ 1421 void __neigh_set_probe_once(struct neighbour *neigh) 1422 { 1423 if (neigh->dead) 1424 return; 1425 neigh->updated = jiffies; 1426 if (!(neigh->nud_state & NUD_FAILED)) 1427 return; 1428 neigh->nud_state = NUD_INCOMPLETE; 1429 atomic_set(&neigh->probes, neigh_max_probes(neigh)); 1430 neigh_add_timer(neigh, 1431 jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME)); 1432 } 1433 EXPORT_SYMBOL(__neigh_set_probe_once); 1434 1435 struct neighbour *neigh_event_ns(struct neigh_table *tbl, 1436 u8 *lladdr, void *saddr, 1437 struct net_device *dev) 1438 { 1439 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev, 1440 lladdr || !dev->addr_len); 1441 if (neigh) 1442 neigh_update(neigh, lladdr, NUD_STALE, 1443 NEIGH_UPDATE_F_OVERRIDE, 0); 1444 return neigh; 1445 } 1446 EXPORT_SYMBOL(neigh_event_ns); 1447 1448 /* called with read_lock_bh(&n->lock); */ 1449 static void neigh_hh_init(struct neighbour *n) 1450 { 1451 struct net_device *dev = n->dev; 1452 __be16 prot = n->tbl->protocol; 1453 struct hh_cache *hh = &n->hh; 1454 1455 write_lock_bh(&n->lock); 1456 1457 /* Only one thread can come in here and initialize the 1458 * hh_cache entry. 1459 */ 1460 if (!hh->hh_len) 1461 dev->header_ops->cache(n, hh, prot); 1462 1463 write_unlock_bh(&n->lock); 1464 } 1465 1466 /* Slow and careful. */ 1467 1468 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) 1469 { 1470 int rc = 0; 1471 1472 if (!neigh_event_send(neigh, skb)) { 1473 int err; 1474 struct net_device *dev = neigh->dev; 1475 unsigned int seq; 1476 1477 if (dev->header_ops->cache && !neigh->hh.hh_len) 1478 neigh_hh_init(neigh); 1479 1480 do { 1481 __skb_pull(skb, skb_network_offset(skb)); 1482 seq = read_seqbegin(&neigh->ha_lock); 1483 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1484 neigh->ha, NULL, skb->len); 1485 } while (read_seqretry(&neigh->ha_lock, seq)); 1486 1487 if (err >= 0) 1488 rc = dev_queue_xmit(skb); 1489 else 1490 goto out_kfree_skb; 1491 } 1492 out: 1493 return rc; 1494 out_kfree_skb: 1495 rc = -EINVAL; 1496 kfree_skb(skb); 1497 goto out; 1498 } 1499 EXPORT_SYMBOL(neigh_resolve_output); 1500 1501 /* As fast as possible without hh cache */ 1502 1503 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb) 1504 { 1505 struct net_device *dev = neigh->dev; 1506 unsigned int seq; 1507 int err; 1508 1509 do { 1510 __skb_pull(skb, skb_network_offset(skb)); 1511 seq = read_seqbegin(&neigh->ha_lock); 1512 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1513 neigh->ha, NULL, skb->len); 1514 } while (read_seqretry(&neigh->ha_lock, seq)); 1515 1516 if (err >= 0) 1517 err = dev_queue_xmit(skb); 1518 else { 1519 err = -EINVAL; 1520 kfree_skb(skb); 1521 } 1522 return err; 1523 } 1524 EXPORT_SYMBOL(neigh_connected_output); 1525 1526 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb) 1527 { 1528 return dev_queue_xmit(skb); 1529 } 1530 EXPORT_SYMBOL(neigh_direct_output); 1531 1532 static void neigh_proxy_process(struct timer_list *t) 1533 { 1534 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer); 1535 long sched_next = 0; 1536 unsigned long now = jiffies; 1537 struct sk_buff *skb, *n; 1538 1539 spin_lock(&tbl->proxy_queue.lock); 1540 1541 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) { 1542 long tdif = NEIGH_CB(skb)->sched_next - now; 1543 1544 if (tdif <= 0) { 1545 struct net_device *dev = skb->dev; 1546 1547 __skb_unlink(skb, &tbl->proxy_queue); 1548 if (tbl->proxy_redo && netif_running(dev)) { 1549 rcu_read_lock(); 1550 tbl->proxy_redo(skb); 1551 rcu_read_unlock(); 1552 } else { 1553 kfree_skb(skb); 1554 } 1555 1556 dev_put(dev); 1557 } else if (!sched_next || tdif < sched_next) 1558 sched_next = tdif; 1559 } 1560 del_timer(&tbl->proxy_timer); 1561 if (sched_next) 1562 mod_timer(&tbl->proxy_timer, jiffies + sched_next); 1563 spin_unlock(&tbl->proxy_queue.lock); 1564 } 1565 1566 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, 1567 struct sk_buff *skb) 1568 { 1569 unsigned long now = jiffies; 1570 1571 unsigned long sched_next = now + (prandom_u32() % 1572 NEIGH_VAR(p, PROXY_DELAY)); 1573 1574 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { 1575 kfree_skb(skb); 1576 return; 1577 } 1578 1579 NEIGH_CB(skb)->sched_next = sched_next; 1580 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED; 1581 1582 spin_lock(&tbl->proxy_queue.lock); 1583 if (del_timer(&tbl->proxy_timer)) { 1584 if (time_before(tbl->proxy_timer.expires, sched_next)) 1585 sched_next = tbl->proxy_timer.expires; 1586 } 1587 skb_dst_drop(skb); 1588 dev_hold(skb->dev); 1589 __skb_queue_tail(&tbl->proxy_queue, skb); 1590 mod_timer(&tbl->proxy_timer, sched_next); 1591 spin_unlock(&tbl->proxy_queue.lock); 1592 } 1593 EXPORT_SYMBOL(pneigh_enqueue); 1594 1595 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, 1596 struct net *net, int ifindex) 1597 { 1598 struct neigh_parms *p; 1599 1600 list_for_each_entry(p, &tbl->parms_list, list) { 1601 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) || 1602 (!p->dev && !ifindex && net_eq(net, &init_net))) 1603 return p; 1604 } 1605 1606 return NULL; 1607 } 1608 1609 struct neigh_parms *neigh_parms_alloc(struct net_device *dev, 1610 struct neigh_table *tbl) 1611 { 1612 struct neigh_parms *p; 1613 struct net *net = dev_net(dev); 1614 const struct net_device_ops *ops = dev->netdev_ops; 1615 1616 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); 1617 if (p) { 1618 p->tbl = tbl; 1619 refcount_set(&p->refcnt, 1); 1620 p->reachable_time = 1621 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 1622 dev_hold(dev); 1623 p->dev = dev; 1624 write_pnet(&p->net, net); 1625 p->sysctl_table = NULL; 1626 1627 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { 1628 dev_put(dev); 1629 kfree(p); 1630 return NULL; 1631 } 1632 1633 write_lock_bh(&tbl->lock); 1634 list_add(&p->list, &tbl->parms.list); 1635 write_unlock_bh(&tbl->lock); 1636 1637 neigh_parms_data_state_cleanall(p); 1638 } 1639 return p; 1640 } 1641 EXPORT_SYMBOL(neigh_parms_alloc); 1642 1643 static void neigh_rcu_free_parms(struct rcu_head *head) 1644 { 1645 struct neigh_parms *parms = 1646 container_of(head, struct neigh_parms, rcu_head); 1647 1648 neigh_parms_put(parms); 1649 } 1650 1651 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) 1652 { 1653 if (!parms || parms == &tbl->parms) 1654 return; 1655 write_lock_bh(&tbl->lock); 1656 list_del(&parms->list); 1657 parms->dead = 1; 1658 write_unlock_bh(&tbl->lock); 1659 if (parms->dev) 1660 dev_put(parms->dev); 1661 call_rcu(&parms->rcu_head, neigh_rcu_free_parms); 1662 } 1663 EXPORT_SYMBOL(neigh_parms_release); 1664 1665 static void neigh_parms_destroy(struct neigh_parms *parms) 1666 { 1667 kfree(parms); 1668 } 1669 1670 static struct lock_class_key neigh_table_proxy_queue_class; 1671 1672 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly; 1673 1674 void neigh_table_init(int index, struct neigh_table *tbl) 1675 { 1676 unsigned long now = jiffies; 1677 unsigned long phsize; 1678 1679 INIT_LIST_HEAD(&tbl->parms_list); 1680 INIT_LIST_HEAD(&tbl->gc_list); 1681 list_add(&tbl->parms.list, &tbl->parms_list); 1682 write_pnet(&tbl->parms.net, &init_net); 1683 refcount_set(&tbl->parms.refcnt, 1); 1684 tbl->parms.reachable_time = 1685 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); 1686 1687 tbl->stats = alloc_percpu(struct neigh_statistics); 1688 if (!tbl->stats) 1689 panic("cannot create neighbour cache statistics"); 1690 1691 #ifdef CONFIG_PROC_FS 1692 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat, 1693 &neigh_stat_seq_ops, tbl)) 1694 panic("cannot create neighbour proc dir entry"); 1695 #endif 1696 1697 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3)); 1698 1699 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); 1700 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); 1701 1702 if (!tbl->nht || !tbl->phash_buckets) 1703 panic("cannot allocate neighbour cache hashes"); 1704 1705 if (!tbl->entry_size) 1706 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) + 1707 tbl->key_len, NEIGH_PRIV_ALIGN); 1708 else 1709 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); 1710 1711 rwlock_init(&tbl->lock); 1712 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); 1713 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 1714 tbl->parms.reachable_time); 1715 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0); 1716 skb_queue_head_init_class(&tbl->proxy_queue, 1717 &neigh_table_proxy_queue_class); 1718 1719 tbl->last_flush = now; 1720 tbl->last_rand = now + tbl->parms.reachable_time * 20; 1721 1722 neigh_tables[index] = tbl; 1723 } 1724 EXPORT_SYMBOL(neigh_table_init); 1725 1726 int neigh_table_clear(int index, struct neigh_table *tbl) 1727 { 1728 neigh_tables[index] = NULL; 1729 /* It is not clean... Fix it to unload IPv6 module safely */ 1730 cancel_delayed_work_sync(&tbl->gc_work); 1731 del_timer_sync(&tbl->proxy_timer); 1732 pneigh_queue_purge(&tbl->proxy_queue); 1733 neigh_ifdown(tbl, NULL); 1734 if (atomic_read(&tbl->entries)) 1735 pr_crit("neighbour leakage\n"); 1736 1737 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, 1738 neigh_hash_free_rcu); 1739 tbl->nht = NULL; 1740 1741 kfree(tbl->phash_buckets); 1742 tbl->phash_buckets = NULL; 1743 1744 remove_proc_entry(tbl->id, init_net.proc_net_stat); 1745 1746 free_percpu(tbl->stats); 1747 tbl->stats = NULL; 1748 1749 return 0; 1750 } 1751 EXPORT_SYMBOL(neigh_table_clear); 1752 1753 static struct neigh_table *neigh_find_table(int family) 1754 { 1755 struct neigh_table *tbl = NULL; 1756 1757 switch (family) { 1758 case AF_INET: 1759 tbl = neigh_tables[NEIGH_ARP_TABLE]; 1760 break; 1761 case AF_INET6: 1762 tbl = neigh_tables[NEIGH_ND_TABLE]; 1763 break; 1764 case AF_DECnet: 1765 tbl = neigh_tables[NEIGH_DN_TABLE]; 1766 break; 1767 } 1768 1769 return tbl; 1770 } 1771 1772 const struct nla_policy nda_policy[NDA_MAX+1] = { 1773 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, 1774 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, 1775 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) }, 1776 [NDA_PROBES] = { .type = NLA_U32 }, 1777 [NDA_VLAN] = { .type = NLA_U16 }, 1778 [NDA_PORT] = { .type = NLA_U16 }, 1779 [NDA_VNI] = { .type = NLA_U32 }, 1780 [NDA_IFINDEX] = { .type = NLA_U32 }, 1781 [NDA_MASTER] = { .type = NLA_U32 }, 1782 [NDA_PROTOCOL] = { .type = NLA_U8 }, 1783 }; 1784 1785 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, 1786 struct netlink_ext_ack *extack) 1787 { 1788 struct net *net = sock_net(skb->sk); 1789 struct ndmsg *ndm; 1790 struct nlattr *dst_attr; 1791 struct neigh_table *tbl; 1792 struct neighbour *neigh; 1793 struct net_device *dev = NULL; 1794 int err = -EINVAL; 1795 1796 ASSERT_RTNL(); 1797 if (nlmsg_len(nlh) < sizeof(*ndm)) 1798 goto out; 1799 1800 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST); 1801 if (!dst_attr) { 1802 NL_SET_ERR_MSG(extack, "Network address not specified"); 1803 goto out; 1804 } 1805 1806 ndm = nlmsg_data(nlh); 1807 if (ndm->ndm_ifindex) { 1808 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 1809 if (dev == NULL) { 1810 err = -ENODEV; 1811 goto out; 1812 } 1813 } 1814 1815 tbl = neigh_find_table(ndm->ndm_family); 1816 if (tbl == NULL) 1817 return -EAFNOSUPPORT; 1818 1819 if (nla_len(dst_attr) < (int)tbl->key_len) { 1820 NL_SET_ERR_MSG(extack, "Invalid network address"); 1821 goto out; 1822 } 1823 1824 if (ndm->ndm_flags & NTF_PROXY) { 1825 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); 1826 goto out; 1827 } 1828 1829 if (dev == NULL) 1830 goto out; 1831 1832 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); 1833 if (neigh == NULL) { 1834 err = -ENOENT; 1835 goto out; 1836 } 1837 1838 err = __neigh_update(neigh, NULL, NUD_FAILED, 1839 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, 1840 NETLINK_CB(skb).portid, extack); 1841 write_lock_bh(&tbl->lock); 1842 neigh_release(neigh); 1843 neigh_remove_one(neigh, tbl); 1844 write_unlock_bh(&tbl->lock); 1845 1846 out: 1847 return err; 1848 } 1849 1850 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, 1851 struct netlink_ext_ack *extack) 1852 { 1853 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE | 1854 NEIGH_UPDATE_F_OVERRIDE_ISROUTER; 1855 struct net *net = sock_net(skb->sk); 1856 struct ndmsg *ndm; 1857 struct nlattr *tb[NDA_MAX+1]; 1858 struct neigh_table *tbl; 1859 struct net_device *dev = NULL; 1860 struct neighbour *neigh; 1861 void *dst, *lladdr; 1862 u8 protocol = 0; 1863 int err; 1864 1865 ASSERT_RTNL(); 1866 err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, 1867 nda_policy, extack); 1868 if (err < 0) 1869 goto out; 1870 1871 err = -EINVAL; 1872 if (!tb[NDA_DST]) { 1873 NL_SET_ERR_MSG(extack, "Network address not specified"); 1874 goto out; 1875 } 1876 1877 ndm = nlmsg_data(nlh); 1878 if (ndm->ndm_ifindex) { 1879 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 1880 if (dev == NULL) { 1881 err = -ENODEV; 1882 goto out; 1883 } 1884 1885 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) { 1886 NL_SET_ERR_MSG(extack, "Invalid link address"); 1887 goto out; 1888 } 1889 } 1890 1891 tbl = neigh_find_table(ndm->ndm_family); 1892 if (tbl == NULL) 1893 return -EAFNOSUPPORT; 1894 1895 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) { 1896 NL_SET_ERR_MSG(extack, "Invalid network address"); 1897 goto out; 1898 } 1899 1900 dst = nla_data(tb[NDA_DST]); 1901 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; 1902 1903 if (tb[NDA_PROTOCOL]) 1904 protocol = nla_get_u8(tb[NDA_PROTOCOL]); 1905 1906 if (ndm->ndm_flags & NTF_PROXY) { 1907 struct pneigh_entry *pn; 1908 1909 err = -ENOBUFS; 1910 pn = pneigh_lookup(tbl, net, dst, dev, 1); 1911 if (pn) { 1912 pn->flags = ndm->ndm_flags; 1913 if (protocol) 1914 pn->protocol = protocol; 1915 err = 0; 1916 } 1917 goto out; 1918 } 1919 1920 if (!dev) { 1921 NL_SET_ERR_MSG(extack, "Device not specified"); 1922 goto out; 1923 } 1924 1925 if (tbl->allow_add && !tbl->allow_add(dev, extack)) { 1926 err = -EINVAL; 1927 goto out; 1928 } 1929 1930 neigh = neigh_lookup(tbl, dst, dev); 1931 if (neigh == NULL) { 1932 bool exempt_from_gc; 1933 1934 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { 1935 err = -ENOENT; 1936 goto out; 1937 } 1938 1939 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || 1940 ndm->ndm_flags & NTF_EXT_LEARNED; 1941 neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true); 1942 if (IS_ERR(neigh)) { 1943 err = PTR_ERR(neigh); 1944 goto out; 1945 } 1946 } else { 1947 if (nlh->nlmsg_flags & NLM_F_EXCL) { 1948 err = -EEXIST; 1949 neigh_release(neigh); 1950 goto out; 1951 } 1952 1953 if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) 1954 flags &= ~(NEIGH_UPDATE_F_OVERRIDE | 1955 NEIGH_UPDATE_F_OVERRIDE_ISROUTER); 1956 } 1957 1958 if (ndm->ndm_flags & NTF_EXT_LEARNED) 1959 flags |= NEIGH_UPDATE_F_EXT_LEARNED; 1960 1961 if (ndm->ndm_flags & NTF_ROUTER) 1962 flags |= NEIGH_UPDATE_F_ISROUTER; 1963 1964 if (ndm->ndm_flags & NTF_USE) { 1965 neigh_event_send(neigh, NULL); 1966 err = 0; 1967 } else 1968 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, 1969 NETLINK_CB(skb).portid, extack); 1970 1971 if (protocol) 1972 neigh->protocol = protocol; 1973 1974 neigh_release(neigh); 1975 1976 out: 1977 return err; 1978 } 1979 1980 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) 1981 { 1982 struct nlattr *nest; 1983 1984 nest = nla_nest_start_noflag(skb, NDTA_PARMS); 1985 if (nest == NULL) 1986 return -ENOBUFS; 1987 1988 if ((parms->dev && 1989 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || 1990 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) || 1991 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, 1992 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) || 1993 /* approximative value for deprecated QUEUE_LEN (in packets) */ 1994 nla_put_u32(skb, NDTPA_QUEUE_LEN, 1995 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) || 1996 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) || 1997 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) || 1998 nla_put_u32(skb, NDTPA_UCAST_PROBES, 1999 NEIGH_VAR(parms, UCAST_PROBES)) || 2000 nla_put_u32(skb, NDTPA_MCAST_PROBES, 2001 NEIGH_VAR(parms, MCAST_PROBES)) || 2002 nla_put_u32(skb, NDTPA_MCAST_REPROBES, 2003 NEIGH_VAR(parms, MCAST_REPROBES)) || 2004 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time, 2005 NDTPA_PAD) || 2006 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, 2007 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) || 2008 nla_put_msecs(skb, NDTPA_GC_STALETIME, 2009 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) || 2010 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, 2011 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) || 2012 nla_put_msecs(skb, NDTPA_RETRANS_TIME, 2013 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) || 2014 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, 2015 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) || 2016 nla_put_msecs(skb, NDTPA_PROXY_DELAY, 2017 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) || 2018 nla_put_msecs(skb, NDTPA_LOCKTIME, 2019 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD)) 2020 goto nla_put_failure; 2021 return nla_nest_end(skb, nest); 2022 2023 nla_put_failure: 2024 nla_nest_cancel(skb, nest); 2025 return -EMSGSIZE; 2026 } 2027 2028 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, 2029 u32 pid, u32 seq, int type, int flags) 2030 { 2031 struct nlmsghdr *nlh; 2032 struct ndtmsg *ndtmsg; 2033 2034 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); 2035 if (nlh == NULL) 2036 return -EMSGSIZE; 2037 2038 ndtmsg = nlmsg_data(nlh); 2039 2040 read_lock_bh(&tbl->lock); 2041 ndtmsg->ndtm_family = tbl->family; 2042 ndtmsg->ndtm_pad1 = 0; 2043 ndtmsg->ndtm_pad2 = 0; 2044 2045 if (nla_put_string(skb, NDTA_NAME, tbl->id) || 2046 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) || 2047 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) || 2048 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) || 2049 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3)) 2050 goto nla_put_failure; 2051 { 2052 unsigned long now = jiffies; 2053 unsigned int flush_delta = now - tbl->last_flush; 2054 unsigned int rand_delta = now - tbl->last_rand; 2055 struct neigh_hash_table *nht; 2056 struct ndt_config ndc = { 2057 .ndtc_key_len = tbl->key_len, 2058 .ndtc_entry_size = tbl->entry_size, 2059 .ndtc_entries = atomic_read(&tbl->entries), 2060 .ndtc_last_flush = jiffies_to_msecs(flush_delta), 2061 .ndtc_last_rand = jiffies_to_msecs(rand_delta), 2062 .ndtc_proxy_qlen = tbl->proxy_queue.qlen, 2063 }; 2064 2065 rcu_read_lock_bh(); 2066 nht = rcu_dereference_bh(tbl->nht); 2067 ndc.ndtc_hash_rnd = nht->hash_rnd[0]; 2068 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); 2069 rcu_read_unlock_bh(); 2070 2071 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc)) 2072 goto nla_put_failure; 2073 } 2074 2075 { 2076 int cpu; 2077 struct ndt_stats ndst; 2078 2079 memset(&ndst, 0, sizeof(ndst)); 2080 2081 for_each_possible_cpu(cpu) { 2082 struct neigh_statistics *st; 2083 2084 st = per_cpu_ptr(tbl->stats, cpu); 2085 ndst.ndts_allocs += st->allocs; 2086 ndst.ndts_destroys += st->destroys; 2087 ndst.ndts_hash_grows += st->hash_grows; 2088 ndst.ndts_res_failed += st->res_failed; 2089 ndst.ndts_lookups += st->lookups; 2090 ndst.ndts_hits += st->hits; 2091 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast; 2092 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast; 2093 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs; 2094 ndst.ndts_forced_gc_runs += st->forced_gc_runs; 2095 ndst.ndts_table_fulls += st->table_fulls; 2096 } 2097 2098 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst, 2099 NDTA_PAD)) 2100 goto nla_put_failure; 2101 } 2102 2103 BUG_ON(tbl->parms.dev); 2104 if (neightbl_fill_parms(skb, &tbl->parms) < 0) 2105 goto nla_put_failure; 2106 2107 read_unlock_bh(&tbl->lock); 2108 nlmsg_end(skb, nlh); 2109 return 0; 2110 2111 nla_put_failure: 2112 read_unlock_bh(&tbl->lock); 2113 nlmsg_cancel(skb, nlh); 2114 return -EMSGSIZE; 2115 } 2116 2117 static int neightbl_fill_param_info(struct sk_buff *skb, 2118 struct neigh_table *tbl, 2119 struct neigh_parms *parms, 2120 u32 pid, u32 seq, int type, 2121 unsigned int flags) 2122 { 2123 struct ndtmsg *ndtmsg; 2124 struct nlmsghdr *nlh; 2125 2126 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); 2127 if (nlh == NULL) 2128 return -EMSGSIZE; 2129 2130 ndtmsg = nlmsg_data(nlh); 2131 2132 read_lock_bh(&tbl->lock); 2133 ndtmsg->ndtm_family = tbl->family; 2134 ndtmsg->ndtm_pad1 = 0; 2135 ndtmsg->ndtm_pad2 = 0; 2136 2137 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 || 2138 neightbl_fill_parms(skb, parms) < 0) 2139 goto errout; 2140 2141 read_unlock_bh(&tbl->lock); 2142 nlmsg_end(skb, nlh); 2143 return 0; 2144 errout: 2145 read_unlock_bh(&tbl->lock); 2146 nlmsg_cancel(skb, nlh); 2147 return -EMSGSIZE; 2148 } 2149 2150 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { 2151 [NDTA_NAME] = { .type = NLA_STRING }, 2152 [NDTA_THRESH1] = { .type = NLA_U32 }, 2153 [NDTA_THRESH2] = { .type = NLA_U32 }, 2154 [NDTA_THRESH3] = { .type = NLA_U32 }, 2155 [NDTA_GC_INTERVAL] = { .type = NLA_U64 }, 2156 [NDTA_PARMS] = { .type = NLA_NESTED }, 2157 }; 2158 2159 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { 2160 [NDTPA_IFINDEX] = { .type = NLA_U32 }, 2161 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, 2162 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, 2163 [NDTPA_APP_PROBES] = { .type = NLA_U32 }, 2164 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, 2165 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 }, 2166 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 }, 2167 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, 2168 [NDTPA_GC_STALETIME] = { .type = NLA_U64 }, 2169 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 }, 2170 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 }, 2171 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 }, 2172 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 }, 2173 [NDTPA_LOCKTIME] = { .type = NLA_U64 }, 2174 }; 2175 2176 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, 2177 struct netlink_ext_ack *extack) 2178 { 2179 struct net *net = sock_net(skb->sk); 2180 struct neigh_table *tbl; 2181 struct ndtmsg *ndtmsg; 2182 struct nlattr *tb[NDTA_MAX+1]; 2183 bool found = false; 2184 int err, tidx; 2185 2186 err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, 2187 nl_neightbl_policy, extack); 2188 if (err < 0) 2189 goto errout; 2190 2191 if (tb[NDTA_NAME] == NULL) { 2192 err = -EINVAL; 2193 goto errout; 2194 } 2195 2196 ndtmsg = nlmsg_data(nlh); 2197 2198 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { 2199 tbl = neigh_tables[tidx]; 2200 if (!tbl) 2201 continue; 2202 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) 2203 continue; 2204 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) { 2205 found = true; 2206 break; 2207 } 2208 } 2209 2210 if (!found) 2211 return -ENOENT; 2212 2213 /* 2214 * We acquire tbl->lock to be nice to the periodic timers and 2215 * make sure they always see a consistent set of values. 2216 */ 2217 write_lock_bh(&tbl->lock); 2218 2219 if (tb[NDTA_PARMS]) { 2220 struct nlattr *tbp[NDTPA_MAX+1]; 2221 struct neigh_parms *p; 2222 int i, ifindex = 0; 2223 2224 err = nla_parse_nested_deprecated(tbp, NDTPA_MAX, 2225 tb[NDTA_PARMS], 2226 nl_ntbl_parm_policy, extack); 2227 if (err < 0) 2228 goto errout_tbl_lock; 2229 2230 if (tbp[NDTPA_IFINDEX]) 2231 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); 2232 2233 p = lookup_neigh_parms(tbl, net, ifindex); 2234 if (p == NULL) { 2235 err = -ENOENT; 2236 goto errout_tbl_lock; 2237 } 2238 2239 for (i = 1; i <= NDTPA_MAX; i++) { 2240 if (tbp[i] == NULL) 2241 continue; 2242 2243 switch (i) { 2244 case NDTPA_QUEUE_LEN: 2245 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, 2246 nla_get_u32(tbp[i]) * 2247 SKB_TRUESIZE(ETH_FRAME_LEN)); 2248 break; 2249 case NDTPA_QUEUE_LENBYTES: 2250 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, 2251 nla_get_u32(tbp[i])); 2252 break; 2253 case NDTPA_PROXY_QLEN: 2254 NEIGH_VAR_SET(p, PROXY_QLEN, 2255 nla_get_u32(tbp[i])); 2256 break; 2257 case NDTPA_APP_PROBES: 2258 NEIGH_VAR_SET(p, APP_PROBES, 2259 nla_get_u32(tbp[i])); 2260 break; 2261 case NDTPA_UCAST_PROBES: 2262 NEIGH_VAR_SET(p, UCAST_PROBES, 2263 nla_get_u32(tbp[i])); 2264 break; 2265 case NDTPA_MCAST_PROBES: 2266 NEIGH_VAR_SET(p, MCAST_PROBES, 2267 nla_get_u32(tbp[i])); 2268 break; 2269 case NDTPA_MCAST_REPROBES: 2270 NEIGH_VAR_SET(p, MCAST_REPROBES, 2271 nla_get_u32(tbp[i])); 2272 break; 2273 case NDTPA_BASE_REACHABLE_TIME: 2274 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, 2275 nla_get_msecs(tbp[i])); 2276 /* update reachable_time as well, otherwise, the change will 2277 * only be effective after the next time neigh_periodic_work 2278 * decides to recompute it (can be multiple minutes) 2279 */ 2280 p->reachable_time = 2281 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 2282 break; 2283 case NDTPA_GC_STALETIME: 2284 NEIGH_VAR_SET(p, GC_STALETIME, 2285 nla_get_msecs(tbp[i])); 2286 break; 2287 case NDTPA_DELAY_PROBE_TIME: 2288 NEIGH_VAR_SET(p, DELAY_PROBE_TIME, 2289 nla_get_msecs(tbp[i])); 2290 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); 2291 break; 2292 case NDTPA_RETRANS_TIME: 2293 NEIGH_VAR_SET(p, RETRANS_TIME, 2294 nla_get_msecs(tbp[i])); 2295 break; 2296 case NDTPA_ANYCAST_DELAY: 2297 NEIGH_VAR_SET(p, ANYCAST_DELAY, 2298 nla_get_msecs(tbp[i])); 2299 break; 2300 case NDTPA_PROXY_DELAY: 2301 NEIGH_VAR_SET(p, PROXY_DELAY, 2302 nla_get_msecs(tbp[i])); 2303 break; 2304 case NDTPA_LOCKTIME: 2305 NEIGH_VAR_SET(p, LOCKTIME, 2306 nla_get_msecs(tbp[i])); 2307 break; 2308 } 2309 } 2310 } 2311 2312 err = -ENOENT; 2313 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] || 2314 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) && 2315 !net_eq(net, &init_net)) 2316 goto errout_tbl_lock; 2317 2318 if (tb[NDTA_THRESH1]) 2319 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); 2320 2321 if (tb[NDTA_THRESH2]) 2322 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]); 2323 2324 if (tb[NDTA_THRESH3]) 2325 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]); 2326 2327 if (tb[NDTA_GC_INTERVAL]) 2328 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]); 2329 2330 err = 0; 2331 2332 errout_tbl_lock: 2333 write_unlock_bh(&tbl->lock); 2334 errout: 2335 return err; 2336 } 2337 2338 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh, 2339 struct netlink_ext_ack *extack) 2340 { 2341 struct ndtmsg *ndtm; 2342 2343 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) { 2344 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request"); 2345 return -EINVAL; 2346 } 2347 2348 ndtm = nlmsg_data(nlh); 2349 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) { 2350 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request"); 2351 return -EINVAL; 2352 } 2353 2354 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) { 2355 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request"); 2356 return -EINVAL; 2357 } 2358 2359 return 0; 2360 } 2361 2362 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2363 { 2364 const struct nlmsghdr *nlh = cb->nlh; 2365 struct net *net = sock_net(skb->sk); 2366 int family, tidx, nidx = 0; 2367 int tbl_skip = cb->args[0]; 2368 int neigh_skip = cb->args[1]; 2369 struct neigh_table *tbl; 2370 2371 if (cb->strict_check) { 2372 int err = neightbl_valid_dump_info(nlh, cb->extack); 2373 2374 if (err < 0) 2375 return err; 2376 } 2377 2378 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; 2379 2380 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { 2381 struct neigh_parms *p; 2382 2383 tbl = neigh_tables[tidx]; 2384 if (!tbl) 2385 continue; 2386 2387 if (tidx < tbl_skip || (family && tbl->family != family)) 2388 continue; 2389 2390 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, 2391 nlh->nlmsg_seq, RTM_NEWNEIGHTBL, 2392 NLM_F_MULTI) < 0) 2393 break; 2394 2395 nidx = 0; 2396 p = list_next_entry(&tbl->parms, list); 2397 list_for_each_entry_from(p, &tbl->parms_list, list) { 2398 if (!net_eq(neigh_parms_net(p), net)) 2399 continue; 2400 2401 if (nidx < neigh_skip) 2402 goto next; 2403 2404 if (neightbl_fill_param_info(skb, tbl, p, 2405 NETLINK_CB(cb->skb).portid, 2406 nlh->nlmsg_seq, 2407 RTM_NEWNEIGHTBL, 2408 NLM_F_MULTI) < 0) 2409 goto out; 2410 next: 2411 nidx++; 2412 } 2413 2414 neigh_skip = 0; 2415 } 2416 out: 2417 cb->args[0] = tidx; 2418 cb->args[1] = nidx; 2419 2420 return skb->len; 2421 } 2422 2423 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, 2424 u32 pid, u32 seq, int type, unsigned int flags) 2425 { 2426 unsigned long now = jiffies; 2427 struct nda_cacheinfo ci; 2428 struct nlmsghdr *nlh; 2429 struct ndmsg *ndm; 2430 2431 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); 2432 if (nlh == NULL) 2433 return -EMSGSIZE; 2434 2435 ndm = nlmsg_data(nlh); 2436 ndm->ndm_family = neigh->ops->family; 2437 ndm->ndm_pad1 = 0; 2438 ndm->ndm_pad2 = 0; 2439 ndm->ndm_flags = neigh->flags; 2440 ndm->ndm_type = neigh->type; 2441 ndm->ndm_ifindex = neigh->dev->ifindex; 2442 2443 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key)) 2444 goto nla_put_failure; 2445 2446 read_lock_bh(&neigh->lock); 2447 ndm->ndm_state = neigh->nud_state; 2448 if (neigh->nud_state & NUD_VALID) { 2449 char haddr[MAX_ADDR_LEN]; 2450 2451 neigh_ha_snapshot(haddr, neigh, neigh->dev); 2452 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) { 2453 read_unlock_bh(&neigh->lock); 2454 goto nla_put_failure; 2455 } 2456 } 2457 2458 ci.ndm_used = jiffies_to_clock_t(now - neigh->used); 2459 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed); 2460 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated); 2461 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1; 2462 read_unlock_bh(&neigh->lock); 2463 2464 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) || 2465 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) 2466 goto nla_put_failure; 2467 2468 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol)) 2469 goto nla_put_failure; 2470 2471 nlmsg_end(skb, nlh); 2472 return 0; 2473 2474 nla_put_failure: 2475 nlmsg_cancel(skb, nlh); 2476 return -EMSGSIZE; 2477 } 2478 2479 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, 2480 u32 pid, u32 seq, int type, unsigned int flags, 2481 struct neigh_table *tbl) 2482 { 2483 struct nlmsghdr *nlh; 2484 struct ndmsg *ndm; 2485 2486 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); 2487 if (nlh == NULL) 2488 return -EMSGSIZE; 2489 2490 ndm = nlmsg_data(nlh); 2491 ndm->ndm_family = tbl->family; 2492 ndm->ndm_pad1 = 0; 2493 ndm->ndm_pad2 = 0; 2494 ndm->ndm_flags = pn->flags | NTF_PROXY; 2495 ndm->ndm_type = RTN_UNICAST; 2496 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0; 2497 ndm->ndm_state = NUD_NONE; 2498 2499 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) 2500 goto nla_put_failure; 2501 2502 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol)) 2503 goto nla_put_failure; 2504 2505 nlmsg_end(skb, nlh); 2506 return 0; 2507 2508 nla_put_failure: 2509 nlmsg_cancel(skb, nlh); 2510 return -EMSGSIZE; 2511 } 2512 2513 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid) 2514 { 2515 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); 2516 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid); 2517 } 2518 2519 static bool neigh_master_filtered(struct net_device *dev, int master_idx) 2520 { 2521 struct net_device *master; 2522 2523 if (!master_idx) 2524 return false; 2525 2526 master = dev ? netdev_master_upper_dev_get(dev) : NULL; 2527 if (!master || master->ifindex != master_idx) 2528 return true; 2529 2530 return false; 2531 } 2532 2533 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx) 2534 { 2535 if (filter_idx && (!dev || dev->ifindex != filter_idx)) 2536 return true; 2537 2538 return false; 2539 } 2540 2541 struct neigh_dump_filter { 2542 int master_idx; 2543 int dev_idx; 2544 }; 2545 2546 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2547 struct netlink_callback *cb, 2548 struct neigh_dump_filter *filter) 2549 { 2550 struct net *net = sock_net(skb->sk); 2551 struct neighbour *n; 2552 int rc, h, s_h = cb->args[1]; 2553 int idx, s_idx = idx = cb->args[2]; 2554 struct neigh_hash_table *nht; 2555 unsigned int flags = NLM_F_MULTI; 2556 2557 if (filter->dev_idx || filter->master_idx) 2558 flags |= NLM_F_DUMP_FILTERED; 2559 2560 rcu_read_lock_bh(); 2561 nht = rcu_dereference_bh(tbl->nht); 2562 2563 for (h = s_h; h < (1 << nht->hash_shift); h++) { 2564 if (h > s_h) 2565 s_idx = 0; 2566 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; 2567 n != NULL; 2568 n = rcu_dereference_bh(n->next)) { 2569 if (idx < s_idx || !net_eq(dev_net(n->dev), net)) 2570 goto next; 2571 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || 2572 neigh_master_filtered(n->dev, filter->master_idx)) 2573 goto next; 2574 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2575 cb->nlh->nlmsg_seq, 2576 RTM_NEWNEIGH, 2577 flags) < 0) { 2578 rc = -1; 2579 goto out; 2580 } 2581 next: 2582 idx++; 2583 } 2584 } 2585 rc = skb->len; 2586 out: 2587 rcu_read_unlock_bh(); 2588 cb->args[1] = h; 2589 cb->args[2] = idx; 2590 return rc; 2591 } 2592 2593 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2594 struct netlink_callback *cb, 2595 struct neigh_dump_filter *filter) 2596 { 2597 struct pneigh_entry *n; 2598 struct net *net = sock_net(skb->sk); 2599 int rc, h, s_h = cb->args[3]; 2600 int idx, s_idx = idx = cb->args[4]; 2601 unsigned int flags = NLM_F_MULTI; 2602 2603 if (filter->dev_idx || filter->master_idx) 2604 flags |= NLM_F_DUMP_FILTERED; 2605 2606 read_lock_bh(&tbl->lock); 2607 2608 for (h = s_h; h <= PNEIGH_HASHMASK; h++) { 2609 if (h > s_h) 2610 s_idx = 0; 2611 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { 2612 if (idx < s_idx || pneigh_net(n) != net) 2613 goto next; 2614 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || 2615 neigh_master_filtered(n->dev, filter->master_idx)) 2616 goto next; 2617 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2618 cb->nlh->nlmsg_seq, 2619 RTM_NEWNEIGH, flags, tbl) < 0) { 2620 read_unlock_bh(&tbl->lock); 2621 rc = -1; 2622 goto out; 2623 } 2624 next: 2625 idx++; 2626 } 2627 } 2628 2629 read_unlock_bh(&tbl->lock); 2630 rc = skb->len; 2631 out: 2632 cb->args[3] = h; 2633 cb->args[4] = idx; 2634 return rc; 2635 2636 } 2637 2638 static int neigh_valid_dump_req(const struct nlmsghdr *nlh, 2639 bool strict_check, 2640 struct neigh_dump_filter *filter, 2641 struct netlink_ext_ack *extack) 2642 { 2643 struct nlattr *tb[NDA_MAX + 1]; 2644 int err, i; 2645 2646 if (strict_check) { 2647 struct ndmsg *ndm; 2648 2649 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { 2650 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request"); 2651 return -EINVAL; 2652 } 2653 2654 ndm = nlmsg_data(nlh); 2655 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex || 2656 ndm->ndm_state || ndm->ndm_type) { 2657 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request"); 2658 return -EINVAL; 2659 } 2660 2661 if (ndm->ndm_flags & ~NTF_PROXY) { 2662 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request"); 2663 return -EINVAL; 2664 } 2665 2666 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), 2667 tb, NDA_MAX, nda_policy, 2668 extack); 2669 } else { 2670 err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb, 2671 NDA_MAX, nda_policy, extack); 2672 } 2673 if (err < 0) 2674 return err; 2675 2676 for (i = 0; i <= NDA_MAX; ++i) { 2677 if (!tb[i]) 2678 continue; 2679 2680 /* all new attributes should require strict_check */ 2681 switch (i) { 2682 case NDA_IFINDEX: 2683 filter->dev_idx = nla_get_u32(tb[i]); 2684 break; 2685 case NDA_MASTER: 2686 filter->master_idx = nla_get_u32(tb[i]); 2687 break; 2688 default: 2689 if (strict_check) { 2690 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request"); 2691 return -EINVAL; 2692 } 2693 } 2694 } 2695 2696 return 0; 2697 } 2698 2699 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2700 { 2701 const struct nlmsghdr *nlh = cb->nlh; 2702 struct neigh_dump_filter filter = {}; 2703 struct neigh_table *tbl; 2704 int t, family, s_t; 2705 int proxy = 0; 2706 int err; 2707 2708 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; 2709 2710 /* check for full ndmsg structure presence, family member is 2711 * the same for both structures 2712 */ 2713 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) && 2714 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY) 2715 proxy = 1; 2716 2717 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack); 2718 if (err < 0 && cb->strict_check) 2719 return err; 2720 2721 s_t = cb->args[0]; 2722 2723 for (t = 0; t < NEIGH_NR_TABLES; t++) { 2724 tbl = neigh_tables[t]; 2725 2726 if (!tbl) 2727 continue; 2728 if (t < s_t || (family && tbl->family != family)) 2729 continue; 2730 if (t > s_t) 2731 memset(&cb->args[1], 0, sizeof(cb->args) - 2732 sizeof(cb->args[0])); 2733 if (proxy) 2734 err = pneigh_dump_table(tbl, skb, cb, &filter); 2735 else 2736 err = neigh_dump_table(tbl, skb, cb, &filter); 2737 if (err < 0) 2738 break; 2739 } 2740 2741 cb->args[0] = t; 2742 return skb->len; 2743 } 2744 2745 static int neigh_valid_get_req(const struct nlmsghdr *nlh, 2746 struct neigh_table **tbl, 2747 void **dst, int *dev_idx, u8 *ndm_flags, 2748 struct netlink_ext_ack *extack) 2749 { 2750 struct nlattr *tb[NDA_MAX + 1]; 2751 struct ndmsg *ndm; 2752 int err, i; 2753 2754 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { 2755 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request"); 2756 return -EINVAL; 2757 } 2758 2759 ndm = nlmsg_data(nlh); 2760 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || 2761 ndm->ndm_type) { 2762 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request"); 2763 return -EINVAL; 2764 } 2765 2766 if (ndm->ndm_flags & ~NTF_PROXY) { 2767 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request"); 2768 return -EINVAL; 2769 } 2770 2771 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb, 2772 NDA_MAX, nda_policy, extack); 2773 if (err < 0) 2774 return err; 2775 2776 *ndm_flags = ndm->ndm_flags; 2777 *dev_idx = ndm->ndm_ifindex; 2778 *tbl = neigh_find_table(ndm->ndm_family); 2779 if (*tbl == NULL) { 2780 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request"); 2781 return -EAFNOSUPPORT; 2782 } 2783 2784 for (i = 0; i <= NDA_MAX; ++i) { 2785 if (!tb[i]) 2786 continue; 2787 2788 switch (i) { 2789 case NDA_DST: 2790 if (nla_len(tb[i]) != (int)(*tbl)->key_len) { 2791 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request"); 2792 return -EINVAL; 2793 } 2794 *dst = nla_data(tb[i]); 2795 break; 2796 default: 2797 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request"); 2798 return -EINVAL; 2799 } 2800 } 2801 2802 return 0; 2803 } 2804 2805 static inline size_t neigh_nlmsg_size(void) 2806 { 2807 return NLMSG_ALIGN(sizeof(struct ndmsg)) 2808 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ 2809 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */ 2810 + nla_total_size(sizeof(struct nda_cacheinfo)) 2811 + nla_total_size(4) /* NDA_PROBES */ 2812 + nla_total_size(1); /* NDA_PROTOCOL */ 2813 } 2814 2815 static int neigh_get_reply(struct net *net, struct neighbour *neigh, 2816 u32 pid, u32 seq) 2817 { 2818 struct sk_buff *skb; 2819 int err = 0; 2820 2821 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL); 2822 if (!skb) 2823 return -ENOBUFS; 2824 2825 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0); 2826 if (err) { 2827 kfree_skb(skb); 2828 goto errout; 2829 } 2830 2831 err = rtnl_unicast(skb, net, pid); 2832 errout: 2833 return err; 2834 } 2835 2836 static inline size_t pneigh_nlmsg_size(void) 2837 { 2838 return NLMSG_ALIGN(sizeof(struct ndmsg)) 2839 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ 2840 + nla_total_size(1); /* NDA_PROTOCOL */ 2841 } 2842 2843 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh, 2844 u32 pid, u32 seq, struct neigh_table *tbl) 2845 { 2846 struct sk_buff *skb; 2847 int err = 0; 2848 2849 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL); 2850 if (!skb) 2851 return -ENOBUFS; 2852 2853 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl); 2854 if (err) { 2855 kfree_skb(skb); 2856 goto errout; 2857 } 2858 2859 err = rtnl_unicast(skb, net, pid); 2860 errout: 2861 return err; 2862 } 2863 2864 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2865 struct netlink_ext_ack *extack) 2866 { 2867 struct net *net = sock_net(in_skb->sk); 2868 struct net_device *dev = NULL; 2869 struct neigh_table *tbl = NULL; 2870 struct neighbour *neigh; 2871 void *dst = NULL; 2872 u8 ndm_flags = 0; 2873 int dev_idx = 0; 2874 int err; 2875 2876 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags, 2877 extack); 2878 if (err < 0) 2879 return err; 2880 2881 if (dev_idx) { 2882 dev = __dev_get_by_index(net, dev_idx); 2883 if (!dev) { 2884 NL_SET_ERR_MSG(extack, "Unknown device ifindex"); 2885 return -ENODEV; 2886 } 2887 } 2888 2889 if (!dst) { 2890 NL_SET_ERR_MSG(extack, "Network address not specified"); 2891 return -EINVAL; 2892 } 2893 2894 if (ndm_flags & NTF_PROXY) { 2895 struct pneigh_entry *pn; 2896 2897 pn = pneigh_lookup(tbl, net, dst, dev, 0); 2898 if (!pn) { 2899 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found"); 2900 return -ENOENT; 2901 } 2902 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid, 2903 nlh->nlmsg_seq, tbl); 2904 } 2905 2906 if (!dev) { 2907 NL_SET_ERR_MSG(extack, "No device specified"); 2908 return -EINVAL; 2909 } 2910 2911 neigh = neigh_lookup(tbl, dst, dev); 2912 if (!neigh) { 2913 NL_SET_ERR_MSG(extack, "Neighbour entry not found"); 2914 return -ENOENT; 2915 } 2916 2917 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid, 2918 nlh->nlmsg_seq); 2919 2920 neigh_release(neigh); 2921 2922 return err; 2923 } 2924 2925 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) 2926 { 2927 int chain; 2928 struct neigh_hash_table *nht; 2929 2930 rcu_read_lock_bh(); 2931 nht = rcu_dereference_bh(tbl->nht); 2932 2933 read_lock(&tbl->lock); /* avoid resizes */ 2934 for (chain = 0; chain < (1 << nht->hash_shift); chain++) { 2935 struct neighbour *n; 2936 2937 for (n = rcu_dereference_bh(nht->hash_buckets[chain]); 2938 n != NULL; 2939 n = rcu_dereference_bh(n->next)) 2940 cb(n, cookie); 2941 } 2942 read_unlock(&tbl->lock); 2943 rcu_read_unlock_bh(); 2944 } 2945 EXPORT_SYMBOL(neigh_for_each); 2946 2947 /* The tbl->lock must be held as a writer and BH disabled. */ 2948 void __neigh_for_each_release(struct neigh_table *tbl, 2949 int (*cb)(struct neighbour *)) 2950 { 2951 int chain; 2952 struct neigh_hash_table *nht; 2953 2954 nht = rcu_dereference_protected(tbl->nht, 2955 lockdep_is_held(&tbl->lock)); 2956 for (chain = 0; chain < (1 << nht->hash_shift); chain++) { 2957 struct neighbour *n; 2958 struct neighbour __rcu **np; 2959 2960 np = &nht->hash_buckets[chain]; 2961 while ((n = rcu_dereference_protected(*np, 2962 lockdep_is_held(&tbl->lock))) != NULL) { 2963 int release; 2964 2965 write_lock(&n->lock); 2966 release = cb(n); 2967 if (release) { 2968 rcu_assign_pointer(*np, 2969 rcu_dereference_protected(n->next, 2970 lockdep_is_held(&tbl->lock))); 2971 neigh_mark_dead(n); 2972 } else 2973 np = &n->next; 2974 write_unlock(&n->lock); 2975 if (release) 2976 neigh_cleanup_and_release(n); 2977 } 2978 } 2979 } 2980 EXPORT_SYMBOL(__neigh_for_each_release); 2981 2982 int neigh_xmit(int index, struct net_device *dev, 2983 const void *addr, struct sk_buff *skb) 2984 { 2985 int err = -EAFNOSUPPORT; 2986 if (likely(index < NEIGH_NR_TABLES)) { 2987 struct neigh_table *tbl; 2988 struct neighbour *neigh; 2989 2990 tbl = neigh_tables[index]; 2991 if (!tbl) 2992 goto out; 2993 rcu_read_lock_bh(); 2994 if (index == NEIGH_ARP_TABLE) { 2995 u32 key = *((u32 *)addr); 2996 2997 neigh = __ipv4_neigh_lookup_noref(dev, key); 2998 } else { 2999 neigh = __neigh_lookup_noref(tbl, addr, dev); 3000 } 3001 if (!neigh) 3002 neigh = __neigh_create(tbl, addr, dev, false); 3003 err = PTR_ERR(neigh); 3004 if (IS_ERR(neigh)) { 3005 rcu_read_unlock_bh(); 3006 goto out_kfree_skb; 3007 } 3008 err = neigh->output(neigh, skb); 3009 rcu_read_unlock_bh(); 3010 } 3011 else if (index == NEIGH_LINK_TABLE) { 3012 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 3013 addr, NULL, skb->len); 3014 if (err < 0) 3015 goto out_kfree_skb; 3016 err = dev_queue_xmit(skb); 3017 } 3018 out: 3019 return err; 3020 out_kfree_skb: 3021 kfree_skb(skb); 3022 goto out; 3023 } 3024 EXPORT_SYMBOL(neigh_xmit); 3025 3026 #ifdef CONFIG_PROC_FS 3027 3028 static struct neighbour *neigh_get_first(struct seq_file *seq) 3029 { 3030 struct neigh_seq_state *state = seq->private; 3031 struct net *net = seq_file_net(seq); 3032 struct neigh_hash_table *nht = state->nht; 3033 struct neighbour *n = NULL; 3034 int bucket = state->bucket; 3035 3036 state->flags &= ~NEIGH_SEQ_IS_PNEIGH; 3037 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { 3038 n = rcu_dereference_bh(nht->hash_buckets[bucket]); 3039 3040 while (n) { 3041 if (!net_eq(dev_net(n->dev), net)) 3042 goto next; 3043 if (state->neigh_sub_iter) { 3044 loff_t fakep = 0; 3045 void *v; 3046 3047 v = state->neigh_sub_iter(state, n, &fakep); 3048 if (!v) 3049 goto next; 3050 } 3051 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) 3052 break; 3053 if (n->nud_state & ~NUD_NOARP) 3054 break; 3055 next: 3056 n = rcu_dereference_bh(n->next); 3057 } 3058 3059 if (n) 3060 break; 3061 } 3062 state->bucket = bucket; 3063 3064 return n; 3065 } 3066 3067 static struct neighbour *neigh_get_next(struct seq_file *seq, 3068 struct neighbour *n, 3069 loff_t *pos) 3070 { 3071 struct neigh_seq_state *state = seq->private; 3072 struct net *net = seq_file_net(seq); 3073 struct neigh_hash_table *nht = state->nht; 3074 3075 if (state->neigh_sub_iter) { 3076 void *v = state->neigh_sub_iter(state, n, pos); 3077 if (v) 3078 return n; 3079 } 3080 n = rcu_dereference_bh(n->next); 3081 3082 while (1) { 3083 while (n) { 3084 if (!net_eq(dev_net(n->dev), net)) 3085 goto next; 3086 if (state->neigh_sub_iter) { 3087 void *v = state->neigh_sub_iter(state, n, pos); 3088 if (v) 3089 return n; 3090 goto next; 3091 } 3092 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) 3093 break; 3094 3095 if (n->nud_state & ~NUD_NOARP) 3096 break; 3097 next: 3098 n = rcu_dereference_bh(n->next); 3099 } 3100 3101 if (n) 3102 break; 3103 3104 if (++state->bucket >= (1 << nht->hash_shift)) 3105 break; 3106 3107 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]); 3108 } 3109 3110 if (n && pos) 3111 --(*pos); 3112 return n; 3113 } 3114 3115 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) 3116 { 3117 struct neighbour *n = neigh_get_first(seq); 3118 3119 if (n) { 3120 --(*pos); 3121 while (*pos) { 3122 n = neigh_get_next(seq, n, pos); 3123 if (!n) 3124 break; 3125 } 3126 } 3127 return *pos ? NULL : n; 3128 } 3129 3130 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) 3131 { 3132 struct neigh_seq_state *state = seq->private; 3133 struct net *net = seq_file_net(seq); 3134 struct neigh_table *tbl = state->tbl; 3135 struct pneigh_entry *pn = NULL; 3136 int bucket = state->bucket; 3137 3138 state->flags |= NEIGH_SEQ_IS_PNEIGH; 3139 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { 3140 pn = tbl->phash_buckets[bucket]; 3141 while (pn && !net_eq(pneigh_net(pn), net)) 3142 pn = pn->next; 3143 if (pn) 3144 break; 3145 } 3146 state->bucket = bucket; 3147 3148 return pn; 3149 } 3150 3151 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, 3152 struct pneigh_entry *pn, 3153 loff_t *pos) 3154 { 3155 struct neigh_seq_state *state = seq->private; 3156 struct net *net = seq_file_net(seq); 3157 struct neigh_table *tbl = state->tbl; 3158 3159 do { 3160 pn = pn->next; 3161 } while (pn && !net_eq(pneigh_net(pn), net)); 3162 3163 while (!pn) { 3164 if (++state->bucket > PNEIGH_HASHMASK) 3165 break; 3166 pn = tbl->phash_buckets[state->bucket]; 3167 while (pn && !net_eq(pneigh_net(pn), net)) 3168 pn = pn->next; 3169 if (pn) 3170 break; 3171 } 3172 3173 if (pn && pos) 3174 --(*pos); 3175 3176 return pn; 3177 } 3178 3179 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos) 3180 { 3181 struct pneigh_entry *pn = pneigh_get_first(seq); 3182 3183 if (pn) { 3184 --(*pos); 3185 while (*pos) { 3186 pn = pneigh_get_next(seq, pn, pos); 3187 if (!pn) 3188 break; 3189 } 3190 } 3191 return *pos ? NULL : pn; 3192 } 3193 3194 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) 3195 { 3196 struct neigh_seq_state *state = seq->private; 3197 void *rc; 3198 loff_t idxpos = *pos; 3199 3200 rc = neigh_get_idx(seq, &idxpos); 3201 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY)) 3202 rc = pneigh_get_idx(seq, &idxpos); 3203 3204 return rc; 3205 } 3206 3207 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) 3208 __acquires(tbl->lock) 3209 __acquires(rcu_bh) 3210 { 3211 struct neigh_seq_state *state = seq->private; 3212 3213 state->tbl = tbl; 3214 state->bucket = 0; 3215 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); 3216 3217 rcu_read_lock_bh(); 3218 state->nht = rcu_dereference_bh(tbl->nht); 3219 read_lock(&tbl->lock); 3220 3221 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; 3222 } 3223 EXPORT_SYMBOL(neigh_seq_start); 3224 3225 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3226 { 3227 struct neigh_seq_state *state; 3228 void *rc; 3229 3230 if (v == SEQ_START_TOKEN) { 3231 rc = neigh_get_first(seq); 3232 goto out; 3233 } 3234 3235 state = seq->private; 3236 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) { 3237 rc = neigh_get_next(seq, v, NULL); 3238 if (rc) 3239 goto out; 3240 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY)) 3241 rc = pneigh_get_first(seq); 3242 } else { 3243 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY); 3244 rc = pneigh_get_next(seq, v, NULL); 3245 } 3246 out: 3247 ++(*pos); 3248 return rc; 3249 } 3250 EXPORT_SYMBOL(neigh_seq_next); 3251 3252 void neigh_seq_stop(struct seq_file *seq, void *v) 3253 __releases(tbl->lock) 3254 __releases(rcu_bh) 3255 { 3256 struct neigh_seq_state *state = seq->private; 3257 struct neigh_table *tbl = state->tbl; 3258 3259 read_unlock(&tbl->lock); 3260 rcu_read_unlock_bh(); 3261 } 3262 EXPORT_SYMBOL(neigh_seq_stop); 3263 3264 /* statistics via seq_file */ 3265 3266 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) 3267 { 3268 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3269 int cpu; 3270 3271 if (*pos == 0) 3272 return SEQ_START_TOKEN; 3273 3274 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { 3275 if (!cpu_possible(cpu)) 3276 continue; 3277 *pos = cpu+1; 3278 return per_cpu_ptr(tbl->stats, cpu); 3279 } 3280 return NULL; 3281 } 3282 3283 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3284 { 3285 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3286 int cpu; 3287 3288 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { 3289 if (!cpu_possible(cpu)) 3290 continue; 3291 *pos = cpu+1; 3292 return per_cpu_ptr(tbl->stats, cpu); 3293 } 3294 return NULL; 3295 } 3296 3297 static void neigh_stat_seq_stop(struct seq_file *seq, void *v) 3298 { 3299 3300 } 3301 3302 static int neigh_stat_seq_show(struct seq_file *seq, void *v) 3303 { 3304 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3305 struct neigh_statistics *st = v; 3306 3307 if (v == SEQ_START_TOKEN) { 3308 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n"); 3309 return 0; 3310 } 3311 3312 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " 3313 "%08lx %08lx %08lx %08lx %08lx %08lx\n", 3314 atomic_read(&tbl->entries), 3315 3316 st->allocs, 3317 st->destroys, 3318 st->hash_grows, 3319 3320 st->lookups, 3321 st->hits, 3322 3323 st->res_failed, 3324 3325 st->rcv_probes_mcast, 3326 st->rcv_probes_ucast, 3327 3328 st->periodic_gc_runs, 3329 st->forced_gc_runs, 3330 st->unres_discards, 3331 st->table_fulls 3332 ); 3333 3334 return 0; 3335 } 3336 3337 static const struct seq_operations neigh_stat_seq_ops = { 3338 .start = neigh_stat_seq_start, 3339 .next = neigh_stat_seq_next, 3340 .stop = neigh_stat_seq_stop, 3341 .show = neigh_stat_seq_show, 3342 }; 3343 #endif /* CONFIG_PROC_FS */ 3344 3345 static void __neigh_notify(struct neighbour *n, int type, int flags, 3346 u32 pid) 3347 { 3348 struct net *net = dev_net(n->dev); 3349 struct sk_buff *skb; 3350 int err = -ENOBUFS; 3351 3352 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC); 3353 if (skb == NULL) 3354 goto errout; 3355 3356 err = neigh_fill_info(skb, n, pid, 0, type, flags); 3357 if (err < 0) { 3358 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */ 3359 WARN_ON(err == -EMSGSIZE); 3360 kfree_skb(skb); 3361 goto errout; 3362 } 3363 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); 3364 return; 3365 errout: 3366 if (err < 0) 3367 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); 3368 } 3369 3370 void neigh_app_ns(struct neighbour *n) 3371 { 3372 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0); 3373 } 3374 EXPORT_SYMBOL(neigh_app_ns); 3375 3376 #ifdef CONFIG_SYSCTL 3377 static int zero; 3378 static int int_max = INT_MAX; 3379 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); 3380 3381 static int proc_unres_qlen(struct ctl_table *ctl, int write, 3382 void __user *buffer, size_t *lenp, loff_t *ppos) 3383 { 3384 int size, ret; 3385 struct ctl_table tmp = *ctl; 3386 3387 tmp.extra1 = &zero; 3388 tmp.extra2 = &unres_qlen_max; 3389 tmp.data = &size; 3390 3391 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN); 3392 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 3393 3394 if (write && !ret) 3395 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); 3396 return ret; 3397 } 3398 3399 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, 3400 int family) 3401 { 3402 switch (family) { 3403 case AF_INET: 3404 return __in_dev_arp_parms_get_rcu(dev); 3405 case AF_INET6: 3406 return __in6_dev_nd_parms_get_rcu(dev); 3407 } 3408 return NULL; 3409 } 3410 3411 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, 3412 int index) 3413 { 3414 struct net_device *dev; 3415 int family = neigh_parms_family(p); 3416 3417 rcu_read_lock(); 3418 for_each_netdev_rcu(net, dev) { 3419 struct neigh_parms *dst_p = 3420 neigh_get_dev_parms_rcu(dev, family); 3421 3422 if (dst_p && !test_bit(index, dst_p->data_state)) 3423 dst_p->data[index] = p->data[index]; 3424 } 3425 rcu_read_unlock(); 3426 } 3427 3428 static void neigh_proc_update(struct ctl_table *ctl, int write) 3429 { 3430 struct net_device *dev = ctl->extra1; 3431 struct neigh_parms *p = ctl->extra2; 3432 struct net *net = neigh_parms_net(p); 3433 int index = (int *) ctl->data - p->data; 3434 3435 if (!write) 3436 return; 3437 3438 set_bit(index, p->data_state); 3439 if (index == NEIGH_VAR_DELAY_PROBE_TIME) 3440 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); 3441 if (!dev) /* NULL dev means this is default value */ 3442 neigh_copy_dflt_parms(net, p, index); 3443 } 3444 3445 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, 3446 void __user *buffer, 3447 size_t *lenp, loff_t *ppos) 3448 { 3449 struct ctl_table tmp = *ctl; 3450 int ret; 3451 3452 tmp.extra1 = &zero; 3453 tmp.extra2 = &int_max; 3454 3455 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 3456 neigh_proc_update(ctl, write); 3457 return ret; 3458 } 3459 3460 int neigh_proc_dointvec(struct ctl_table *ctl, int write, 3461 void __user *buffer, size_t *lenp, loff_t *ppos) 3462 { 3463 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 3464 3465 neigh_proc_update(ctl, write); 3466 return ret; 3467 } 3468 EXPORT_SYMBOL(neigh_proc_dointvec); 3469 3470 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, 3471 void __user *buffer, 3472 size_t *lenp, loff_t *ppos) 3473 { 3474 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); 3475 3476 neigh_proc_update(ctl, write); 3477 return ret; 3478 } 3479 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies); 3480 3481 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write, 3482 void __user *buffer, 3483 size_t *lenp, loff_t *ppos) 3484 { 3485 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos); 3486 3487 neigh_proc_update(ctl, write); 3488 return ret; 3489 } 3490 3491 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, 3492 void __user *buffer, 3493 size_t *lenp, loff_t *ppos) 3494 { 3495 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); 3496 3497 neigh_proc_update(ctl, write); 3498 return ret; 3499 } 3500 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies); 3501 3502 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, 3503 void __user *buffer, 3504 size_t *lenp, loff_t *ppos) 3505 { 3506 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos); 3507 3508 neigh_proc_update(ctl, write); 3509 return ret; 3510 } 3511 3512 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write, 3513 void __user *buffer, 3514 size_t *lenp, loff_t *ppos) 3515 { 3516 struct neigh_parms *p = ctl->extra2; 3517 int ret; 3518 3519 if (strcmp(ctl->procname, "base_reachable_time") == 0) 3520 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); 3521 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0) 3522 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); 3523 else 3524 ret = -1; 3525 3526 if (write && ret == 0) { 3527 /* update reachable_time as well, otherwise, the change will 3528 * only be effective after the next time neigh_periodic_work 3529 * decides to recompute it 3530 */ 3531 p->reachable_time = 3532 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 3533 } 3534 return ret; 3535 } 3536 3537 #define NEIGH_PARMS_DATA_OFFSET(index) \ 3538 (&((struct neigh_parms *) 0)->data[index]) 3539 3540 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \ 3541 [NEIGH_VAR_ ## attr] = { \ 3542 .procname = name, \ 3543 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \ 3544 .maxlen = sizeof(int), \ 3545 .mode = mval, \ 3546 .proc_handler = proc, \ 3547 } 3548 3549 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \ 3550 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax) 3551 3552 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \ 3553 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies) 3554 3555 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \ 3556 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies) 3557 3558 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \ 3559 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies) 3560 3561 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \ 3562 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies) 3563 3564 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \ 3565 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen) 3566 3567 static struct neigh_sysctl_table { 3568 struct ctl_table_header *sysctl_header; 3569 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; 3570 } neigh_sysctl_template __read_mostly = { 3571 .neigh_vars = { 3572 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), 3573 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"), 3574 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"), 3575 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"), 3576 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), 3577 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), 3578 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), 3579 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"), 3580 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"), 3581 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"), 3582 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"), 3583 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"), 3584 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"), 3585 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"), 3586 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"), 3587 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"), 3588 [NEIGH_VAR_GC_INTERVAL] = { 3589 .procname = "gc_interval", 3590 .maxlen = sizeof(int), 3591 .mode = 0644, 3592 .proc_handler = proc_dointvec_jiffies, 3593 }, 3594 [NEIGH_VAR_GC_THRESH1] = { 3595 .procname = "gc_thresh1", 3596 .maxlen = sizeof(int), 3597 .mode = 0644, 3598 .extra1 = &zero, 3599 .extra2 = &int_max, 3600 .proc_handler = proc_dointvec_minmax, 3601 }, 3602 [NEIGH_VAR_GC_THRESH2] = { 3603 .procname = "gc_thresh2", 3604 .maxlen = sizeof(int), 3605 .mode = 0644, 3606 .extra1 = &zero, 3607 .extra2 = &int_max, 3608 .proc_handler = proc_dointvec_minmax, 3609 }, 3610 [NEIGH_VAR_GC_THRESH3] = { 3611 .procname = "gc_thresh3", 3612 .maxlen = sizeof(int), 3613 .mode = 0644, 3614 .extra1 = &zero, 3615 .extra2 = &int_max, 3616 .proc_handler = proc_dointvec_minmax, 3617 }, 3618 {}, 3619 }, 3620 }; 3621 3622 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, 3623 proc_handler *handler) 3624 { 3625 int i; 3626 struct neigh_sysctl_table *t; 3627 const char *dev_name_source; 3628 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; 3629 char *p_name; 3630 3631 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); 3632 if (!t) 3633 goto err; 3634 3635 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) { 3636 t->neigh_vars[i].data += (long) p; 3637 t->neigh_vars[i].extra1 = dev; 3638 t->neigh_vars[i].extra2 = p; 3639 } 3640 3641 if (dev) { 3642 dev_name_source = dev->name; 3643 /* Terminate the table early */ 3644 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, 3645 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); 3646 } else { 3647 struct neigh_table *tbl = p->tbl; 3648 dev_name_source = "default"; 3649 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval; 3650 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1; 3651 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2; 3652 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3; 3653 } 3654 3655 if (handler) { 3656 /* RetransTime */ 3657 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; 3658 /* ReachableTime */ 3659 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; 3660 /* RetransTime (in milliseconds)*/ 3661 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; 3662 /* ReachableTime (in milliseconds) */ 3663 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; 3664 } else { 3665 /* Those handlers will update p->reachable_time after 3666 * base_reachable_time(_ms) is set to ensure the new timer starts being 3667 * applied after the next neighbour update instead of waiting for 3668 * neigh_periodic_work to update its value (can be multiple minutes) 3669 * So any handler that replaces them should do this as well 3670 */ 3671 /* ReachableTime */ 3672 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = 3673 neigh_proc_base_reachable_time; 3674 /* ReachableTime (in milliseconds) */ 3675 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = 3676 neigh_proc_base_reachable_time; 3677 } 3678 3679 /* Don't export sysctls to unprivileged users */ 3680 if (neigh_parms_net(p)->user_ns != &init_user_ns) 3681 t->neigh_vars[0].procname = NULL; 3682 3683 switch (neigh_parms_family(p)) { 3684 case AF_INET: 3685 p_name = "ipv4"; 3686 break; 3687 case AF_INET6: 3688 p_name = "ipv6"; 3689 break; 3690 default: 3691 BUG(); 3692 } 3693 3694 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", 3695 p_name, dev_name_source); 3696 t->sysctl_header = 3697 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars); 3698 if (!t->sysctl_header) 3699 goto free; 3700 3701 p->sysctl_table = t; 3702 return 0; 3703 3704 free: 3705 kfree(t); 3706 err: 3707 return -ENOBUFS; 3708 } 3709 EXPORT_SYMBOL(neigh_sysctl_register); 3710 3711 void neigh_sysctl_unregister(struct neigh_parms *p) 3712 { 3713 if (p->sysctl_table) { 3714 struct neigh_sysctl_table *t = p->sysctl_table; 3715 p->sysctl_table = NULL; 3716 unregister_net_sysctl_table(t->sysctl_header); 3717 kfree(t); 3718 } 3719 } 3720 EXPORT_SYMBOL(neigh_sysctl_unregister); 3721 3722 #endif /* CONFIG_SYSCTL */ 3723 3724 static int __init neigh_init(void) 3725 { 3726 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0); 3727 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0); 3728 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0); 3729 3730 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, 3731 0); 3732 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0); 3733 3734 return 0; 3735 } 3736 3737 subsys_initcall(neigh_init); 3738