1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Generic address resolution entity 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 8 * 9 * Fixes: 10 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add. 11 * Harald Welte Add neighbour cache statistics like rtstat 12 */ 13 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 16 #include <linux/slab.h> 17 #include <linux/kmemleak.h> 18 #include <linux/types.h> 19 #include <linux/kernel.h> 20 #include <linux/module.h> 21 #include <linux/socket.h> 22 #include <linux/netdevice.h> 23 #include <linux/proc_fs.h> 24 #ifdef CONFIG_SYSCTL 25 #include <linux/sysctl.h> 26 #endif 27 #include <linux/times.h> 28 #include <net/net_namespace.h> 29 #include <net/neighbour.h> 30 #include <net/arp.h> 31 #include <net/dst.h> 32 #include <net/sock.h> 33 #include <net/netevent.h> 34 #include <net/netlink.h> 35 #include <linux/rtnetlink.h> 36 #include <linux/random.h> 37 #include <linux/string.h> 38 #include <linux/log2.h> 39 #include <linux/inetdevice.h> 40 #include <net/addrconf.h> 41 42 #include <trace/events/neigh.h> 43 44 #define NEIGH_DEBUG 1 45 #define neigh_dbg(level, fmt, ...) \ 46 do { \ 47 if (level <= NEIGH_DEBUG) \ 48 pr_debug(fmt, ##__VA_ARGS__); \ 49 } while (0) 50 51 #define PNEIGH_HASHMASK 0xF 52 53 static void neigh_timer_handler(struct timer_list *t); 54 static void __neigh_notify(struct neighbour *n, int type, int flags, 55 u32 pid); 56 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid); 57 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, 58 struct net_device *dev); 59 60 #ifdef CONFIG_PROC_FS 61 static const struct seq_operations neigh_stat_seq_ops; 62 #endif 63 64 /* 65 Neighbour hash table buckets are protected with rwlock tbl->lock. 66 67 - All the scans/updates to hash buckets MUST be made under this lock. 68 - NOTHING clever should be made under this lock: no callbacks 69 to protocol backends, no attempts to send something to network. 70 It will result in deadlocks, if backend/driver wants to use neighbour 71 cache. 72 - If the entry requires some non-trivial actions, increase 73 its reference count and release table lock. 74 75 Neighbour entries are protected: 76 - with reference count. 77 - with rwlock neigh->lock 78 79 Reference count prevents destruction. 80 81 neigh->lock mainly serializes ll address data and its validity state. 82 However, the same lock is used to protect another entry fields: 83 - timer 84 - resolution queue 85 86 Again, nothing clever shall be made under neigh->lock, 87 the most complicated procedure, which we allow is dev->hard_header. 88 It is supposed, that dev->hard_header is simplistic and does 89 not make callbacks to neighbour tables. 90 */ 91 92 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb) 93 { 94 kfree_skb(skb); 95 return -ENETDOWN; 96 } 97 98 static void neigh_cleanup_and_release(struct neighbour *neigh) 99 { 100 trace_neigh_cleanup_and_release(neigh, 0); 101 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0); 102 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); 103 neigh_release(neigh); 104 } 105 106 /* 107 * It is random distribution in the interval (1/2)*base...(3/2)*base. 108 * It corresponds to default IPv6 settings and is not overridable, 109 * because it is really reasonable choice. 110 */ 111 112 unsigned long neigh_rand_reach_time(unsigned long base) 113 { 114 return base ? (prandom_u32() % base) + (base >> 1) : 0; 115 } 116 EXPORT_SYMBOL(neigh_rand_reach_time); 117 118 static void neigh_mark_dead(struct neighbour *n) 119 { 120 n->dead = 1; 121 if (!list_empty(&n->gc_list)) { 122 list_del_init(&n->gc_list); 123 atomic_dec(&n->tbl->gc_entries); 124 } 125 } 126 127 static void neigh_update_gc_list(struct neighbour *n) 128 { 129 bool on_gc_list, exempt_from_gc; 130 131 write_lock_bh(&n->tbl->lock); 132 write_lock(&n->lock); 133 134 if (n->dead) 135 goto out; 136 137 /* remove from the gc list if new state is permanent or if neighbor 138 * is externally learned; otherwise entry should be on the gc list 139 */ 140 exempt_from_gc = n->nud_state & NUD_PERMANENT || 141 n->flags & NTF_EXT_LEARNED; 142 on_gc_list = !list_empty(&n->gc_list); 143 144 if (exempt_from_gc && on_gc_list) { 145 list_del_init(&n->gc_list); 146 atomic_dec(&n->tbl->gc_entries); 147 } else if (!exempt_from_gc && !on_gc_list) { 148 /* add entries to the tail; cleaning removes from the front */ 149 list_add_tail(&n->gc_list, &n->tbl->gc_list); 150 atomic_inc(&n->tbl->gc_entries); 151 } 152 153 out: 154 write_unlock(&n->lock); 155 write_unlock_bh(&n->tbl->lock); 156 } 157 158 static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags, 159 int *notify) 160 { 161 bool rc = false; 162 u8 ndm_flags; 163 164 if (!(flags & NEIGH_UPDATE_F_ADMIN)) 165 return rc; 166 167 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; 168 if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { 169 if (ndm_flags & NTF_EXT_LEARNED) 170 neigh->flags |= NTF_EXT_LEARNED; 171 else 172 neigh->flags &= ~NTF_EXT_LEARNED; 173 rc = true; 174 *notify = 1; 175 } 176 177 return rc; 178 } 179 180 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, 181 struct neigh_table *tbl) 182 { 183 bool retval = false; 184 185 write_lock(&n->lock); 186 if (refcount_read(&n->refcnt) == 1) { 187 struct neighbour *neigh; 188 189 neigh = rcu_dereference_protected(n->next, 190 lockdep_is_held(&tbl->lock)); 191 rcu_assign_pointer(*np, neigh); 192 neigh_mark_dead(n); 193 retval = true; 194 } 195 write_unlock(&n->lock); 196 if (retval) 197 neigh_cleanup_and_release(n); 198 return retval; 199 } 200 201 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) 202 { 203 struct neigh_hash_table *nht; 204 void *pkey = ndel->primary_key; 205 u32 hash_val; 206 struct neighbour *n; 207 struct neighbour __rcu **np; 208 209 nht = rcu_dereference_protected(tbl->nht, 210 lockdep_is_held(&tbl->lock)); 211 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd); 212 hash_val = hash_val >> (32 - nht->hash_shift); 213 214 np = &nht->hash_buckets[hash_val]; 215 while ((n = rcu_dereference_protected(*np, 216 lockdep_is_held(&tbl->lock)))) { 217 if (n == ndel) 218 return neigh_del(n, np, tbl); 219 np = &n->next; 220 } 221 return false; 222 } 223 224 static int neigh_forced_gc(struct neigh_table *tbl) 225 { 226 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2; 227 unsigned long tref = jiffies - 5 * HZ; 228 struct neighbour *n, *tmp; 229 int shrunk = 0; 230 231 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); 232 233 write_lock_bh(&tbl->lock); 234 235 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) { 236 if (refcount_read(&n->refcnt) == 1) { 237 bool remove = false; 238 239 write_lock(&n->lock); 240 if ((n->nud_state == NUD_FAILED) || 241 (tbl->is_multicast && 242 tbl->is_multicast(n->primary_key)) || 243 time_after(tref, n->updated)) 244 remove = true; 245 write_unlock(&n->lock); 246 247 if (remove && neigh_remove_one(n, tbl)) 248 shrunk++; 249 if (shrunk >= max_clean) 250 break; 251 } 252 } 253 254 tbl->last_flush = jiffies; 255 256 write_unlock_bh(&tbl->lock); 257 258 return shrunk; 259 } 260 261 static void neigh_add_timer(struct neighbour *n, unsigned long when) 262 { 263 neigh_hold(n); 264 if (unlikely(mod_timer(&n->timer, when))) { 265 printk("NEIGH: BUG, double timer add, state is %x\n", 266 n->nud_state); 267 dump_stack(); 268 } 269 } 270 271 static int neigh_del_timer(struct neighbour *n) 272 { 273 if ((n->nud_state & NUD_IN_TIMER) && 274 del_timer(&n->timer)) { 275 neigh_release(n); 276 return 1; 277 } 278 return 0; 279 } 280 281 static void pneigh_queue_purge(struct sk_buff_head *list) 282 { 283 struct sk_buff *skb; 284 285 while ((skb = skb_dequeue(list)) != NULL) { 286 dev_put(skb->dev); 287 kfree_skb(skb); 288 } 289 } 290 291 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, 292 bool skip_perm) 293 { 294 int i; 295 struct neigh_hash_table *nht; 296 297 nht = rcu_dereference_protected(tbl->nht, 298 lockdep_is_held(&tbl->lock)); 299 300 for (i = 0; i < (1 << nht->hash_shift); i++) { 301 struct neighbour *n; 302 struct neighbour __rcu **np = &nht->hash_buckets[i]; 303 304 while ((n = rcu_dereference_protected(*np, 305 lockdep_is_held(&tbl->lock))) != NULL) { 306 if (dev && n->dev != dev) { 307 np = &n->next; 308 continue; 309 } 310 if (skip_perm && n->nud_state & NUD_PERMANENT) { 311 np = &n->next; 312 continue; 313 } 314 rcu_assign_pointer(*np, 315 rcu_dereference_protected(n->next, 316 lockdep_is_held(&tbl->lock))); 317 write_lock(&n->lock); 318 neigh_del_timer(n); 319 neigh_mark_dead(n); 320 if (refcount_read(&n->refcnt) != 1) { 321 /* The most unpleasant situation. 322 We must destroy neighbour entry, 323 but someone still uses it. 324 325 The destroy will be delayed until 326 the last user releases us, but 327 we must kill timers etc. and move 328 it to safe state. 329 */ 330 __skb_queue_purge(&n->arp_queue); 331 n->arp_queue_len_bytes = 0; 332 n->output = neigh_blackhole; 333 if (n->nud_state & NUD_VALID) 334 n->nud_state = NUD_NOARP; 335 else 336 n->nud_state = NUD_NONE; 337 neigh_dbg(2, "neigh %p is stray\n", n); 338 } 339 write_unlock(&n->lock); 340 neigh_cleanup_and_release(n); 341 } 342 } 343 } 344 345 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) 346 { 347 write_lock_bh(&tbl->lock); 348 neigh_flush_dev(tbl, dev, false); 349 write_unlock_bh(&tbl->lock); 350 } 351 EXPORT_SYMBOL(neigh_changeaddr); 352 353 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, 354 bool skip_perm) 355 { 356 write_lock_bh(&tbl->lock); 357 neigh_flush_dev(tbl, dev, skip_perm); 358 pneigh_ifdown_and_unlock(tbl, dev); 359 360 del_timer_sync(&tbl->proxy_timer); 361 pneigh_queue_purge(&tbl->proxy_queue); 362 return 0; 363 } 364 365 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev) 366 { 367 __neigh_ifdown(tbl, dev, true); 368 return 0; 369 } 370 EXPORT_SYMBOL(neigh_carrier_down); 371 372 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) 373 { 374 __neigh_ifdown(tbl, dev, false); 375 return 0; 376 } 377 EXPORT_SYMBOL(neigh_ifdown); 378 379 static struct neighbour *neigh_alloc(struct neigh_table *tbl, 380 struct net_device *dev, 381 bool exempt_from_gc) 382 { 383 struct neighbour *n = NULL; 384 unsigned long now = jiffies; 385 int entries; 386 387 if (exempt_from_gc) 388 goto do_alloc; 389 390 entries = atomic_inc_return(&tbl->gc_entries) - 1; 391 if (entries >= tbl->gc_thresh3 || 392 (entries >= tbl->gc_thresh2 && 393 time_after(now, tbl->last_flush + 5 * HZ))) { 394 if (!neigh_forced_gc(tbl) && 395 entries >= tbl->gc_thresh3) { 396 net_info_ratelimited("%s: neighbor table overflow!\n", 397 tbl->id); 398 NEIGH_CACHE_STAT_INC(tbl, table_fulls); 399 goto out_entries; 400 } 401 } 402 403 do_alloc: 404 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC); 405 if (!n) 406 goto out_entries; 407 408 __skb_queue_head_init(&n->arp_queue); 409 rwlock_init(&n->lock); 410 seqlock_init(&n->ha_lock); 411 n->updated = n->used = now; 412 n->nud_state = NUD_NONE; 413 n->output = neigh_blackhole; 414 seqlock_init(&n->hh.hh_lock); 415 n->parms = neigh_parms_clone(&tbl->parms); 416 timer_setup(&n->timer, neigh_timer_handler, 0); 417 418 NEIGH_CACHE_STAT_INC(tbl, allocs); 419 n->tbl = tbl; 420 refcount_set(&n->refcnt, 1); 421 n->dead = 1; 422 INIT_LIST_HEAD(&n->gc_list); 423 424 atomic_inc(&tbl->entries); 425 out: 426 return n; 427 428 out_entries: 429 if (!exempt_from_gc) 430 atomic_dec(&tbl->gc_entries); 431 goto out; 432 } 433 434 static void neigh_get_hash_rnd(u32 *x) 435 { 436 *x = get_random_u32() | 1; 437 } 438 439 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) 440 { 441 size_t size = (1 << shift) * sizeof(struct neighbour *); 442 struct neigh_hash_table *ret; 443 struct neighbour __rcu **buckets; 444 int i; 445 446 ret = kmalloc(sizeof(*ret), GFP_ATOMIC); 447 if (!ret) 448 return NULL; 449 if (size <= PAGE_SIZE) { 450 buckets = kzalloc(size, GFP_ATOMIC); 451 } else { 452 buckets = (struct neighbour __rcu **) 453 __get_free_pages(GFP_ATOMIC | __GFP_ZERO, 454 get_order(size)); 455 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); 456 } 457 if (!buckets) { 458 kfree(ret); 459 return NULL; 460 } 461 ret->hash_buckets = buckets; 462 ret->hash_shift = shift; 463 for (i = 0; i < NEIGH_NUM_HASH_RND; i++) 464 neigh_get_hash_rnd(&ret->hash_rnd[i]); 465 return ret; 466 } 467 468 static void neigh_hash_free_rcu(struct rcu_head *head) 469 { 470 struct neigh_hash_table *nht = container_of(head, 471 struct neigh_hash_table, 472 rcu); 473 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); 474 struct neighbour __rcu **buckets = nht->hash_buckets; 475 476 if (size <= PAGE_SIZE) { 477 kfree(buckets); 478 } else { 479 kmemleak_free(buckets); 480 free_pages((unsigned long)buckets, get_order(size)); 481 } 482 kfree(nht); 483 } 484 485 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, 486 unsigned long new_shift) 487 { 488 unsigned int i, hash; 489 struct neigh_hash_table *new_nht, *old_nht; 490 491 NEIGH_CACHE_STAT_INC(tbl, hash_grows); 492 493 old_nht = rcu_dereference_protected(tbl->nht, 494 lockdep_is_held(&tbl->lock)); 495 new_nht = neigh_hash_alloc(new_shift); 496 if (!new_nht) 497 return old_nht; 498 499 for (i = 0; i < (1 << old_nht->hash_shift); i++) { 500 struct neighbour *n, *next; 501 502 for (n = rcu_dereference_protected(old_nht->hash_buckets[i], 503 lockdep_is_held(&tbl->lock)); 504 n != NULL; 505 n = next) { 506 hash = tbl->hash(n->primary_key, n->dev, 507 new_nht->hash_rnd); 508 509 hash >>= (32 - new_nht->hash_shift); 510 next = rcu_dereference_protected(n->next, 511 lockdep_is_held(&tbl->lock)); 512 513 rcu_assign_pointer(n->next, 514 rcu_dereference_protected( 515 new_nht->hash_buckets[hash], 516 lockdep_is_held(&tbl->lock))); 517 rcu_assign_pointer(new_nht->hash_buckets[hash], n); 518 } 519 } 520 521 rcu_assign_pointer(tbl->nht, new_nht); 522 call_rcu(&old_nht->rcu, neigh_hash_free_rcu); 523 return new_nht; 524 } 525 526 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, 527 struct net_device *dev) 528 { 529 struct neighbour *n; 530 531 NEIGH_CACHE_STAT_INC(tbl, lookups); 532 533 rcu_read_lock_bh(); 534 n = __neigh_lookup_noref(tbl, pkey, dev); 535 if (n) { 536 if (!refcount_inc_not_zero(&n->refcnt)) 537 n = NULL; 538 NEIGH_CACHE_STAT_INC(tbl, hits); 539 } 540 541 rcu_read_unlock_bh(); 542 return n; 543 } 544 EXPORT_SYMBOL(neigh_lookup); 545 546 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, 547 const void *pkey) 548 { 549 struct neighbour *n; 550 unsigned int key_len = tbl->key_len; 551 u32 hash_val; 552 struct neigh_hash_table *nht; 553 554 NEIGH_CACHE_STAT_INC(tbl, lookups); 555 556 rcu_read_lock_bh(); 557 nht = rcu_dereference_bh(tbl->nht); 558 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift); 559 560 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); 561 n != NULL; 562 n = rcu_dereference_bh(n->next)) { 563 if (!memcmp(n->primary_key, pkey, key_len) && 564 net_eq(dev_net(n->dev), net)) { 565 if (!refcount_inc_not_zero(&n->refcnt)) 566 n = NULL; 567 NEIGH_CACHE_STAT_INC(tbl, hits); 568 break; 569 } 570 } 571 572 rcu_read_unlock_bh(); 573 return n; 574 } 575 EXPORT_SYMBOL(neigh_lookup_nodev); 576 577 static struct neighbour *___neigh_create(struct neigh_table *tbl, 578 const void *pkey, 579 struct net_device *dev, 580 bool exempt_from_gc, bool want_ref) 581 { 582 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc); 583 u32 hash_val; 584 unsigned int key_len = tbl->key_len; 585 int error; 586 struct neigh_hash_table *nht; 587 588 trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc); 589 590 if (!n) { 591 rc = ERR_PTR(-ENOBUFS); 592 goto out; 593 } 594 595 memcpy(n->primary_key, pkey, key_len); 596 n->dev = dev; 597 dev_hold(dev); 598 599 /* Protocol specific setup. */ 600 if (tbl->constructor && (error = tbl->constructor(n)) < 0) { 601 rc = ERR_PTR(error); 602 goto out_neigh_release; 603 } 604 605 if (dev->netdev_ops->ndo_neigh_construct) { 606 error = dev->netdev_ops->ndo_neigh_construct(dev, n); 607 if (error < 0) { 608 rc = ERR_PTR(error); 609 goto out_neigh_release; 610 } 611 } 612 613 /* Device specific setup. */ 614 if (n->parms->neigh_setup && 615 (error = n->parms->neigh_setup(n)) < 0) { 616 rc = ERR_PTR(error); 617 goto out_neigh_release; 618 } 619 620 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1); 621 622 write_lock_bh(&tbl->lock); 623 nht = rcu_dereference_protected(tbl->nht, 624 lockdep_is_held(&tbl->lock)); 625 626 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) 627 nht = neigh_hash_grow(tbl, nht->hash_shift + 1); 628 629 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift); 630 631 if (n->parms->dead) { 632 rc = ERR_PTR(-EINVAL); 633 goto out_tbl_unlock; 634 } 635 636 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], 637 lockdep_is_held(&tbl->lock)); 638 n1 != NULL; 639 n1 = rcu_dereference_protected(n1->next, 640 lockdep_is_held(&tbl->lock))) { 641 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { 642 if (want_ref) 643 neigh_hold(n1); 644 rc = n1; 645 goto out_tbl_unlock; 646 } 647 } 648 649 n->dead = 0; 650 if (!exempt_from_gc) 651 list_add_tail(&n->gc_list, &n->tbl->gc_list); 652 653 if (want_ref) 654 neigh_hold(n); 655 rcu_assign_pointer(n->next, 656 rcu_dereference_protected(nht->hash_buckets[hash_val], 657 lockdep_is_held(&tbl->lock))); 658 rcu_assign_pointer(nht->hash_buckets[hash_val], n); 659 write_unlock_bh(&tbl->lock); 660 neigh_dbg(2, "neigh %p is created\n", n); 661 rc = n; 662 out: 663 return rc; 664 out_tbl_unlock: 665 write_unlock_bh(&tbl->lock); 666 out_neigh_release: 667 if (!exempt_from_gc) 668 atomic_dec(&tbl->gc_entries); 669 neigh_release(n); 670 goto out; 671 } 672 673 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, 674 struct net_device *dev, bool want_ref) 675 { 676 return ___neigh_create(tbl, pkey, dev, false, want_ref); 677 } 678 EXPORT_SYMBOL(__neigh_create); 679 680 static u32 pneigh_hash(const void *pkey, unsigned int key_len) 681 { 682 u32 hash_val = *(u32 *)(pkey + key_len - 4); 683 hash_val ^= (hash_val >> 16); 684 hash_val ^= hash_val >> 8; 685 hash_val ^= hash_val >> 4; 686 hash_val &= PNEIGH_HASHMASK; 687 return hash_val; 688 } 689 690 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, 691 struct net *net, 692 const void *pkey, 693 unsigned int key_len, 694 struct net_device *dev) 695 { 696 while (n) { 697 if (!memcmp(n->key, pkey, key_len) && 698 net_eq(pneigh_net(n), net) && 699 (n->dev == dev || !n->dev)) 700 return n; 701 n = n->next; 702 } 703 return NULL; 704 } 705 706 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, 707 struct net *net, const void *pkey, struct net_device *dev) 708 { 709 unsigned int key_len = tbl->key_len; 710 u32 hash_val = pneigh_hash(pkey, key_len); 711 712 return __pneigh_lookup_1(tbl->phash_buckets[hash_val], 713 net, pkey, key_len, dev); 714 } 715 EXPORT_SYMBOL_GPL(__pneigh_lookup); 716 717 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, 718 struct net *net, const void *pkey, 719 struct net_device *dev, int creat) 720 { 721 struct pneigh_entry *n; 722 unsigned int key_len = tbl->key_len; 723 u32 hash_val = pneigh_hash(pkey, key_len); 724 725 read_lock_bh(&tbl->lock); 726 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val], 727 net, pkey, key_len, dev); 728 read_unlock_bh(&tbl->lock); 729 730 if (n || !creat) 731 goto out; 732 733 ASSERT_RTNL(); 734 735 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); 736 if (!n) 737 goto out; 738 739 n->protocol = 0; 740 write_pnet(&n->net, net); 741 memcpy(n->key, pkey, key_len); 742 n->dev = dev; 743 if (dev) 744 dev_hold(dev); 745 746 if (tbl->pconstructor && tbl->pconstructor(n)) { 747 if (dev) 748 dev_put(dev); 749 kfree(n); 750 n = NULL; 751 goto out; 752 } 753 754 write_lock_bh(&tbl->lock); 755 n->next = tbl->phash_buckets[hash_val]; 756 tbl->phash_buckets[hash_val] = n; 757 write_unlock_bh(&tbl->lock); 758 out: 759 return n; 760 } 761 EXPORT_SYMBOL(pneigh_lookup); 762 763 764 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, 765 struct net_device *dev) 766 { 767 struct pneigh_entry *n, **np; 768 unsigned int key_len = tbl->key_len; 769 u32 hash_val = pneigh_hash(pkey, key_len); 770 771 write_lock_bh(&tbl->lock); 772 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; 773 np = &n->next) { 774 if (!memcmp(n->key, pkey, key_len) && n->dev == dev && 775 net_eq(pneigh_net(n), net)) { 776 *np = n->next; 777 write_unlock_bh(&tbl->lock); 778 if (tbl->pdestructor) 779 tbl->pdestructor(n); 780 if (n->dev) 781 dev_put(n->dev); 782 kfree(n); 783 return 0; 784 } 785 } 786 write_unlock_bh(&tbl->lock); 787 return -ENOENT; 788 } 789 790 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, 791 struct net_device *dev) 792 { 793 struct pneigh_entry *n, **np, *freelist = NULL; 794 u32 h; 795 796 for (h = 0; h <= PNEIGH_HASHMASK; h++) { 797 np = &tbl->phash_buckets[h]; 798 while ((n = *np) != NULL) { 799 if (!dev || n->dev == dev) { 800 *np = n->next; 801 n->next = freelist; 802 freelist = n; 803 continue; 804 } 805 np = &n->next; 806 } 807 } 808 write_unlock_bh(&tbl->lock); 809 while ((n = freelist)) { 810 freelist = n->next; 811 n->next = NULL; 812 if (tbl->pdestructor) 813 tbl->pdestructor(n); 814 if (n->dev) 815 dev_put(n->dev); 816 kfree(n); 817 } 818 return -ENOENT; 819 } 820 821 static void neigh_parms_destroy(struct neigh_parms *parms); 822 823 static inline void neigh_parms_put(struct neigh_parms *parms) 824 { 825 if (refcount_dec_and_test(&parms->refcnt)) 826 neigh_parms_destroy(parms); 827 } 828 829 /* 830 * neighbour must already be out of the table; 831 * 832 */ 833 void neigh_destroy(struct neighbour *neigh) 834 { 835 struct net_device *dev = neigh->dev; 836 837 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); 838 839 if (!neigh->dead) { 840 pr_warn("Destroying alive neighbour %p\n", neigh); 841 dump_stack(); 842 return; 843 } 844 845 if (neigh_del_timer(neigh)) 846 pr_warn("Impossible event\n"); 847 848 write_lock_bh(&neigh->lock); 849 __skb_queue_purge(&neigh->arp_queue); 850 write_unlock_bh(&neigh->lock); 851 neigh->arp_queue_len_bytes = 0; 852 853 if (dev->netdev_ops->ndo_neigh_destroy) 854 dev->netdev_ops->ndo_neigh_destroy(dev, neigh); 855 856 dev_put(dev); 857 neigh_parms_put(neigh->parms); 858 859 neigh_dbg(2, "neigh %p is destroyed\n", neigh); 860 861 atomic_dec(&neigh->tbl->entries); 862 kfree_rcu(neigh, rcu); 863 } 864 EXPORT_SYMBOL(neigh_destroy); 865 866 /* Neighbour state is suspicious; 867 disable fast path. 868 869 Called with write_locked neigh. 870 */ 871 static void neigh_suspect(struct neighbour *neigh) 872 { 873 neigh_dbg(2, "neigh %p is suspected\n", neigh); 874 875 neigh->output = neigh->ops->output; 876 } 877 878 /* Neighbour state is OK; 879 enable fast path. 880 881 Called with write_locked neigh. 882 */ 883 static void neigh_connect(struct neighbour *neigh) 884 { 885 neigh_dbg(2, "neigh %p is connected\n", neigh); 886 887 neigh->output = neigh->ops->connected_output; 888 } 889 890 static void neigh_periodic_work(struct work_struct *work) 891 { 892 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); 893 struct neighbour *n; 894 struct neighbour __rcu **np; 895 unsigned int i; 896 struct neigh_hash_table *nht; 897 898 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); 899 900 write_lock_bh(&tbl->lock); 901 nht = rcu_dereference_protected(tbl->nht, 902 lockdep_is_held(&tbl->lock)); 903 904 /* 905 * periodically recompute ReachableTime from random function 906 */ 907 908 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) { 909 struct neigh_parms *p; 910 tbl->last_rand = jiffies; 911 list_for_each_entry(p, &tbl->parms_list, list) 912 p->reachable_time = 913 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 914 } 915 916 if (atomic_read(&tbl->entries) < tbl->gc_thresh1) 917 goto out; 918 919 for (i = 0 ; i < (1 << nht->hash_shift); i++) { 920 np = &nht->hash_buckets[i]; 921 922 while ((n = rcu_dereference_protected(*np, 923 lockdep_is_held(&tbl->lock))) != NULL) { 924 unsigned int state; 925 926 write_lock(&n->lock); 927 928 state = n->nud_state; 929 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) || 930 (n->flags & NTF_EXT_LEARNED)) { 931 write_unlock(&n->lock); 932 goto next_elt; 933 } 934 935 if (time_before(n->used, n->confirmed)) 936 n->used = n->confirmed; 937 938 if (refcount_read(&n->refcnt) == 1 && 939 (state == NUD_FAILED || 940 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { 941 *np = n->next; 942 neigh_mark_dead(n); 943 write_unlock(&n->lock); 944 neigh_cleanup_and_release(n); 945 continue; 946 } 947 write_unlock(&n->lock); 948 949 next_elt: 950 np = &n->next; 951 } 952 /* 953 * It's fine to release lock here, even if hash table 954 * grows while we are preempted. 955 */ 956 write_unlock_bh(&tbl->lock); 957 cond_resched(); 958 write_lock_bh(&tbl->lock); 959 nht = rcu_dereference_protected(tbl->nht, 960 lockdep_is_held(&tbl->lock)); 961 } 962 out: 963 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks. 964 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2 965 * BASE_REACHABLE_TIME. 966 */ 967 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 968 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1); 969 write_unlock_bh(&tbl->lock); 970 } 971 972 static __inline__ int neigh_max_probes(struct neighbour *n) 973 { 974 struct neigh_parms *p = n->parms; 975 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + 976 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) : 977 NEIGH_VAR(p, MCAST_PROBES)); 978 } 979 980 static void neigh_invalidate(struct neighbour *neigh) 981 __releases(neigh->lock) 982 __acquires(neigh->lock) 983 { 984 struct sk_buff *skb; 985 986 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); 987 neigh_dbg(2, "neigh %p is failed\n", neigh); 988 neigh->updated = jiffies; 989 990 /* It is very thin place. report_unreachable is very complicated 991 routine. Particularly, it can hit the same neighbour entry! 992 993 So that, we try to be accurate and avoid dead loop. --ANK 994 */ 995 while (neigh->nud_state == NUD_FAILED && 996 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { 997 write_unlock(&neigh->lock); 998 neigh->ops->error_report(neigh, skb); 999 write_lock(&neigh->lock); 1000 } 1001 __skb_queue_purge(&neigh->arp_queue); 1002 neigh->arp_queue_len_bytes = 0; 1003 } 1004 1005 static void neigh_probe(struct neighbour *neigh) 1006 __releases(neigh->lock) 1007 { 1008 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue); 1009 /* keep skb alive even if arp_queue overflows */ 1010 if (skb) 1011 skb = skb_clone(skb, GFP_ATOMIC); 1012 write_unlock(&neigh->lock); 1013 if (neigh->ops->solicit) 1014 neigh->ops->solicit(neigh, skb); 1015 atomic_inc(&neigh->probes); 1016 consume_skb(skb); 1017 } 1018 1019 /* Called when a timer expires for a neighbour entry. */ 1020 1021 static void neigh_timer_handler(struct timer_list *t) 1022 { 1023 unsigned long now, next; 1024 struct neighbour *neigh = from_timer(neigh, t, timer); 1025 unsigned int state; 1026 int notify = 0; 1027 1028 write_lock(&neigh->lock); 1029 1030 state = neigh->nud_state; 1031 now = jiffies; 1032 next = now + HZ; 1033 1034 if (!(state & NUD_IN_TIMER)) 1035 goto out; 1036 1037 if (state & NUD_REACHABLE) { 1038 if (time_before_eq(now, 1039 neigh->confirmed + neigh->parms->reachable_time)) { 1040 neigh_dbg(2, "neigh %p is still alive\n", neigh); 1041 next = neigh->confirmed + neigh->parms->reachable_time; 1042 } else if (time_before_eq(now, 1043 neigh->used + 1044 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { 1045 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1046 neigh->nud_state = NUD_DELAY; 1047 neigh->updated = jiffies; 1048 neigh_suspect(neigh); 1049 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); 1050 } else { 1051 neigh_dbg(2, "neigh %p is suspected\n", neigh); 1052 neigh->nud_state = NUD_STALE; 1053 neigh->updated = jiffies; 1054 neigh_suspect(neigh); 1055 notify = 1; 1056 } 1057 } else if (state & NUD_DELAY) { 1058 if (time_before_eq(now, 1059 neigh->confirmed + 1060 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { 1061 neigh_dbg(2, "neigh %p is now reachable\n", neigh); 1062 neigh->nud_state = NUD_REACHABLE; 1063 neigh->updated = jiffies; 1064 neigh_connect(neigh); 1065 notify = 1; 1066 next = neigh->confirmed + neigh->parms->reachable_time; 1067 } else { 1068 neigh_dbg(2, "neigh %p is probed\n", neigh); 1069 neigh->nud_state = NUD_PROBE; 1070 neigh->updated = jiffies; 1071 atomic_set(&neigh->probes, 0); 1072 notify = 1; 1073 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), 1074 HZ/100); 1075 } 1076 } else { 1077 /* NUD_PROBE|NUD_INCOMPLETE */ 1078 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100); 1079 } 1080 1081 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && 1082 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { 1083 neigh->nud_state = NUD_FAILED; 1084 notify = 1; 1085 neigh_invalidate(neigh); 1086 goto out; 1087 } 1088 1089 if (neigh->nud_state & NUD_IN_TIMER) { 1090 if (time_before(next, jiffies + HZ/100)) 1091 next = jiffies + HZ/100; 1092 if (!mod_timer(&neigh->timer, next)) 1093 neigh_hold(neigh); 1094 } 1095 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { 1096 neigh_probe(neigh); 1097 } else { 1098 out: 1099 write_unlock(&neigh->lock); 1100 } 1101 1102 if (notify) 1103 neigh_update_notify(neigh, 0); 1104 1105 trace_neigh_timer_handler(neigh, 0); 1106 1107 neigh_release(neigh); 1108 } 1109 1110 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) 1111 { 1112 int rc; 1113 bool immediate_probe = false; 1114 1115 write_lock_bh(&neigh->lock); 1116 1117 rc = 0; 1118 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) 1119 goto out_unlock_bh; 1120 if (neigh->dead) 1121 goto out_dead; 1122 1123 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { 1124 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + 1125 NEIGH_VAR(neigh->parms, APP_PROBES)) { 1126 unsigned long next, now = jiffies; 1127 1128 atomic_set(&neigh->probes, 1129 NEIGH_VAR(neigh->parms, UCAST_PROBES)); 1130 neigh_del_timer(neigh); 1131 neigh->nud_state = NUD_INCOMPLETE; 1132 neigh->updated = now; 1133 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), 1134 HZ/100); 1135 neigh_add_timer(neigh, next); 1136 immediate_probe = true; 1137 } else { 1138 neigh->nud_state = NUD_FAILED; 1139 neigh->updated = jiffies; 1140 write_unlock_bh(&neigh->lock); 1141 1142 kfree_skb(skb); 1143 return 1; 1144 } 1145 } else if (neigh->nud_state & NUD_STALE) { 1146 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1147 neigh_del_timer(neigh); 1148 neigh->nud_state = NUD_DELAY; 1149 neigh->updated = jiffies; 1150 neigh_add_timer(neigh, jiffies + 1151 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME)); 1152 } 1153 1154 if (neigh->nud_state == NUD_INCOMPLETE) { 1155 if (skb) { 1156 while (neigh->arp_queue_len_bytes + skb->truesize > 1157 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) { 1158 struct sk_buff *buff; 1159 1160 buff = __skb_dequeue(&neigh->arp_queue); 1161 if (!buff) 1162 break; 1163 neigh->arp_queue_len_bytes -= buff->truesize; 1164 kfree_skb(buff); 1165 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); 1166 } 1167 skb_dst_force(skb); 1168 __skb_queue_tail(&neigh->arp_queue, skb); 1169 neigh->arp_queue_len_bytes += skb->truesize; 1170 } 1171 rc = 1; 1172 } 1173 out_unlock_bh: 1174 if (immediate_probe) 1175 neigh_probe(neigh); 1176 else 1177 write_unlock(&neigh->lock); 1178 local_bh_enable(); 1179 trace_neigh_event_send_done(neigh, rc); 1180 return rc; 1181 1182 out_dead: 1183 if (neigh->nud_state & NUD_STALE) 1184 goto out_unlock_bh; 1185 write_unlock_bh(&neigh->lock); 1186 kfree_skb(skb); 1187 trace_neigh_event_send_dead(neigh, 1); 1188 return 1; 1189 } 1190 EXPORT_SYMBOL(__neigh_event_send); 1191 1192 static void neigh_update_hhs(struct neighbour *neigh) 1193 { 1194 struct hh_cache *hh; 1195 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) 1196 = NULL; 1197 1198 if (neigh->dev->header_ops) 1199 update = neigh->dev->header_ops->cache_update; 1200 1201 if (update) { 1202 hh = &neigh->hh; 1203 if (READ_ONCE(hh->hh_len)) { 1204 write_seqlock_bh(&hh->hh_lock); 1205 update(hh, neigh->dev, neigh->ha); 1206 write_sequnlock_bh(&hh->hh_lock); 1207 } 1208 } 1209 } 1210 1211 1212 1213 /* Generic update routine. 1214 -- lladdr is new lladdr or NULL, if it is not supplied. 1215 -- new is new state. 1216 -- flags 1217 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr, 1218 if it is different. 1219 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected" 1220 lladdr instead of overriding it 1221 if it is different. 1222 NEIGH_UPDATE_F_ADMIN means that the change is administrative. 1223 1224 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing 1225 NTF_ROUTER flag. 1226 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as 1227 a router. 1228 1229 Caller MUST hold reference count on the entry. 1230 */ 1231 1232 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, 1233 u8 new, u32 flags, u32 nlmsg_pid, 1234 struct netlink_ext_ack *extack) 1235 { 1236 bool ext_learn_change = false; 1237 u8 old; 1238 int err; 1239 int notify = 0; 1240 struct net_device *dev; 1241 int update_isrouter = 0; 1242 1243 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid); 1244 1245 write_lock_bh(&neigh->lock); 1246 1247 dev = neigh->dev; 1248 old = neigh->nud_state; 1249 err = -EPERM; 1250 1251 if (neigh->dead) { 1252 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead"); 1253 new = old; 1254 goto out; 1255 } 1256 if (!(flags & NEIGH_UPDATE_F_ADMIN) && 1257 (old & (NUD_NOARP | NUD_PERMANENT))) 1258 goto out; 1259 1260 ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); 1261 1262 if (!(new & NUD_VALID)) { 1263 neigh_del_timer(neigh); 1264 if (old & NUD_CONNECTED) 1265 neigh_suspect(neigh); 1266 neigh->nud_state = new; 1267 err = 0; 1268 notify = old & NUD_VALID; 1269 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && 1270 (new & NUD_FAILED)) { 1271 neigh_invalidate(neigh); 1272 notify = 1; 1273 } 1274 goto out; 1275 } 1276 1277 /* Compare new lladdr with cached one */ 1278 if (!dev->addr_len) { 1279 /* First case: device needs no address. */ 1280 lladdr = neigh->ha; 1281 } else if (lladdr) { 1282 /* The second case: if something is already cached 1283 and a new address is proposed: 1284 - compare new & old 1285 - if they are different, check override flag 1286 */ 1287 if ((old & NUD_VALID) && 1288 !memcmp(lladdr, neigh->ha, dev->addr_len)) 1289 lladdr = neigh->ha; 1290 } else { 1291 /* No address is supplied; if we know something, 1292 use it, otherwise discard the request. 1293 */ 1294 err = -EINVAL; 1295 if (!(old & NUD_VALID)) { 1296 NL_SET_ERR_MSG(extack, "No link layer address given"); 1297 goto out; 1298 } 1299 lladdr = neigh->ha; 1300 } 1301 1302 /* Update confirmed timestamp for neighbour entry after we 1303 * received ARP packet even if it doesn't change IP to MAC binding. 1304 */ 1305 if (new & NUD_CONNECTED) 1306 neigh->confirmed = jiffies; 1307 1308 /* If entry was valid and address is not changed, 1309 do not change entry state, if new one is STALE. 1310 */ 1311 err = 0; 1312 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER; 1313 if (old & NUD_VALID) { 1314 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) { 1315 update_isrouter = 0; 1316 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) && 1317 (old & NUD_CONNECTED)) { 1318 lladdr = neigh->ha; 1319 new = NUD_STALE; 1320 } else 1321 goto out; 1322 } else { 1323 if (lladdr == neigh->ha && new == NUD_STALE && 1324 !(flags & NEIGH_UPDATE_F_ADMIN)) 1325 new = old; 1326 } 1327 } 1328 1329 /* Update timestamp only once we know we will make a change to the 1330 * neighbour entry. Otherwise we risk to move the locktime window with 1331 * noop updates and ignore relevant ARP updates. 1332 */ 1333 if (new != old || lladdr != neigh->ha) 1334 neigh->updated = jiffies; 1335 1336 if (new != old) { 1337 neigh_del_timer(neigh); 1338 if (new & NUD_PROBE) 1339 atomic_set(&neigh->probes, 0); 1340 if (new & NUD_IN_TIMER) 1341 neigh_add_timer(neigh, (jiffies + 1342 ((new & NUD_REACHABLE) ? 1343 neigh->parms->reachable_time : 1344 0))); 1345 neigh->nud_state = new; 1346 notify = 1; 1347 } 1348 1349 if (lladdr != neigh->ha) { 1350 write_seqlock(&neigh->ha_lock); 1351 memcpy(&neigh->ha, lladdr, dev->addr_len); 1352 write_sequnlock(&neigh->ha_lock); 1353 neigh_update_hhs(neigh); 1354 if (!(new & NUD_CONNECTED)) 1355 neigh->confirmed = jiffies - 1356 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1); 1357 notify = 1; 1358 } 1359 if (new == old) 1360 goto out; 1361 if (new & NUD_CONNECTED) 1362 neigh_connect(neigh); 1363 else 1364 neigh_suspect(neigh); 1365 if (!(old & NUD_VALID)) { 1366 struct sk_buff *skb; 1367 1368 /* Again: avoid dead loop if something went wrong */ 1369 1370 while (neigh->nud_state & NUD_VALID && 1371 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { 1372 struct dst_entry *dst = skb_dst(skb); 1373 struct neighbour *n2, *n1 = neigh; 1374 write_unlock_bh(&neigh->lock); 1375 1376 rcu_read_lock(); 1377 1378 /* Why not just use 'neigh' as-is? The problem is that 1379 * things such as shaper, eql, and sch_teql can end up 1380 * using alternative, different, neigh objects to output 1381 * the packet in the output path. So what we need to do 1382 * here is re-lookup the top-level neigh in the path so 1383 * we can reinject the packet there. 1384 */ 1385 n2 = NULL; 1386 if (dst && dst->obsolete != DST_OBSOLETE_DEAD) { 1387 n2 = dst_neigh_lookup_skb(dst, skb); 1388 if (n2) 1389 n1 = n2; 1390 } 1391 n1->output(n1, skb); 1392 if (n2) 1393 neigh_release(n2); 1394 rcu_read_unlock(); 1395 1396 write_lock_bh(&neigh->lock); 1397 } 1398 __skb_queue_purge(&neigh->arp_queue); 1399 neigh->arp_queue_len_bytes = 0; 1400 } 1401 out: 1402 if (update_isrouter) 1403 neigh_update_is_router(neigh, flags, ¬ify); 1404 write_unlock_bh(&neigh->lock); 1405 1406 if (((new ^ old) & NUD_PERMANENT) || ext_learn_change) 1407 neigh_update_gc_list(neigh); 1408 1409 if (notify) 1410 neigh_update_notify(neigh, nlmsg_pid); 1411 1412 trace_neigh_update_done(neigh, err); 1413 1414 return err; 1415 } 1416 1417 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, 1418 u32 flags, u32 nlmsg_pid) 1419 { 1420 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL); 1421 } 1422 EXPORT_SYMBOL(neigh_update); 1423 1424 /* Update the neigh to listen temporarily for probe responses, even if it is 1425 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing. 1426 */ 1427 void __neigh_set_probe_once(struct neighbour *neigh) 1428 { 1429 if (neigh->dead) 1430 return; 1431 neigh->updated = jiffies; 1432 if (!(neigh->nud_state & NUD_FAILED)) 1433 return; 1434 neigh->nud_state = NUD_INCOMPLETE; 1435 atomic_set(&neigh->probes, neigh_max_probes(neigh)); 1436 neigh_add_timer(neigh, 1437 jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), 1438 HZ/100)); 1439 } 1440 EXPORT_SYMBOL(__neigh_set_probe_once); 1441 1442 struct neighbour *neigh_event_ns(struct neigh_table *tbl, 1443 u8 *lladdr, void *saddr, 1444 struct net_device *dev) 1445 { 1446 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev, 1447 lladdr || !dev->addr_len); 1448 if (neigh) 1449 neigh_update(neigh, lladdr, NUD_STALE, 1450 NEIGH_UPDATE_F_OVERRIDE, 0); 1451 return neigh; 1452 } 1453 EXPORT_SYMBOL(neigh_event_ns); 1454 1455 /* called with read_lock_bh(&n->lock); */ 1456 static void neigh_hh_init(struct neighbour *n) 1457 { 1458 struct net_device *dev = n->dev; 1459 __be16 prot = n->tbl->protocol; 1460 struct hh_cache *hh = &n->hh; 1461 1462 write_lock_bh(&n->lock); 1463 1464 /* Only one thread can come in here and initialize the 1465 * hh_cache entry. 1466 */ 1467 if (!hh->hh_len) 1468 dev->header_ops->cache(n, hh, prot); 1469 1470 write_unlock_bh(&n->lock); 1471 } 1472 1473 /* Slow and careful. */ 1474 1475 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) 1476 { 1477 int rc = 0; 1478 1479 if (!neigh_event_send(neigh, skb)) { 1480 int err; 1481 struct net_device *dev = neigh->dev; 1482 unsigned int seq; 1483 1484 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len)) 1485 neigh_hh_init(neigh); 1486 1487 do { 1488 __skb_pull(skb, skb_network_offset(skb)); 1489 seq = read_seqbegin(&neigh->ha_lock); 1490 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1491 neigh->ha, NULL, skb->len); 1492 } while (read_seqretry(&neigh->ha_lock, seq)); 1493 1494 if (err >= 0) 1495 rc = dev_queue_xmit(skb); 1496 else 1497 goto out_kfree_skb; 1498 } 1499 out: 1500 return rc; 1501 out_kfree_skb: 1502 rc = -EINVAL; 1503 kfree_skb(skb); 1504 goto out; 1505 } 1506 EXPORT_SYMBOL(neigh_resolve_output); 1507 1508 /* As fast as possible without hh cache */ 1509 1510 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb) 1511 { 1512 struct net_device *dev = neigh->dev; 1513 unsigned int seq; 1514 int err; 1515 1516 do { 1517 __skb_pull(skb, skb_network_offset(skb)); 1518 seq = read_seqbegin(&neigh->ha_lock); 1519 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1520 neigh->ha, NULL, skb->len); 1521 } while (read_seqretry(&neigh->ha_lock, seq)); 1522 1523 if (err >= 0) 1524 err = dev_queue_xmit(skb); 1525 else { 1526 err = -EINVAL; 1527 kfree_skb(skb); 1528 } 1529 return err; 1530 } 1531 EXPORT_SYMBOL(neigh_connected_output); 1532 1533 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb) 1534 { 1535 return dev_queue_xmit(skb); 1536 } 1537 EXPORT_SYMBOL(neigh_direct_output); 1538 1539 static void neigh_proxy_process(struct timer_list *t) 1540 { 1541 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer); 1542 long sched_next = 0; 1543 unsigned long now = jiffies; 1544 struct sk_buff *skb, *n; 1545 1546 spin_lock(&tbl->proxy_queue.lock); 1547 1548 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) { 1549 long tdif = NEIGH_CB(skb)->sched_next - now; 1550 1551 if (tdif <= 0) { 1552 struct net_device *dev = skb->dev; 1553 1554 __skb_unlink(skb, &tbl->proxy_queue); 1555 if (tbl->proxy_redo && netif_running(dev)) { 1556 rcu_read_lock(); 1557 tbl->proxy_redo(skb); 1558 rcu_read_unlock(); 1559 } else { 1560 kfree_skb(skb); 1561 } 1562 1563 dev_put(dev); 1564 } else if (!sched_next || tdif < sched_next) 1565 sched_next = tdif; 1566 } 1567 del_timer(&tbl->proxy_timer); 1568 if (sched_next) 1569 mod_timer(&tbl->proxy_timer, jiffies + sched_next); 1570 spin_unlock(&tbl->proxy_queue.lock); 1571 } 1572 1573 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, 1574 struct sk_buff *skb) 1575 { 1576 unsigned long sched_next = jiffies + 1577 prandom_u32_max(NEIGH_VAR(p, PROXY_DELAY)); 1578 1579 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { 1580 kfree_skb(skb); 1581 return; 1582 } 1583 1584 NEIGH_CB(skb)->sched_next = sched_next; 1585 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED; 1586 1587 spin_lock(&tbl->proxy_queue.lock); 1588 if (del_timer(&tbl->proxy_timer)) { 1589 if (time_before(tbl->proxy_timer.expires, sched_next)) 1590 sched_next = tbl->proxy_timer.expires; 1591 } 1592 skb_dst_drop(skb); 1593 dev_hold(skb->dev); 1594 __skb_queue_tail(&tbl->proxy_queue, skb); 1595 mod_timer(&tbl->proxy_timer, sched_next); 1596 spin_unlock(&tbl->proxy_queue.lock); 1597 } 1598 EXPORT_SYMBOL(pneigh_enqueue); 1599 1600 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, 1601 struct net *net, int ifindex) 1602 { 1603 struct neigh_parms *p; 1604 1605 list_for_each_entry(p, &tbl->parms_list, list) { 1606 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) || 1607 (!p->dev && !ifindex && net_eq(net, &init_net))) 1608 return p; 1609 } 1610 1611 return NULL; 1612 } 1613 1614 struct neigh_parms *neigh_parms_alloc(struct net_device *dev, 1615 struct neigh_table *tbl) 1616 { 1617 struct neigh_parms *p; 1618 struct net *net = dev_net(dev); 1619 const struct net_device_ops *ops = dev->netdev_ops; 1620 1621 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); 1622 if (p) { 1623 p->tbl = tbl; 1624 refcount_set(&p->refcnt, 1); 1625 p->reachable_time = 1626 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 1627 dev_hold(dev); 1628 p->dev = dev; 1629 write_pnet(&p->net, net); 1630 p->sysctl_table = NULL; 1631 1632 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { 1633 dev_put(dev); 1634 kfree(p); 1635 return NULL; 1636 } 1637 1638 write_lock_bh(&tbl->lock); 1639 list_add(&p->list, &tbl->parms.list); 1640 write_unlock_bh(&tbl->lock); 1641 1642 neigh_parms_data_state_cleanall(p); 1643 } 1644 return p; 1645 } 1646 EXPORT_SYMBOL(neigh_parms_alloc); 1647 1648 static void neigh_rcu_free_parms(struct rcu_head *head) 1649 { 1650 struct neigh_parms *parms = 1651 container_of(head, struct neigh_parms, rcu_head); 1652 1653 neigh_parms_put(parms); 1654 } 1655 1656 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) 1657 { 1658 if (!parms || parms == &tbl->parms) 1659 return; 1660 write_lock_bh(&tbl->lock); 1661 list_del(&parms->list); 1662 parms->dead = 1; 1663 write_unlock_bh(&tbl->lock); 1664 if (parms->dev) 1665 dev_put(parms->dev); 1666 call_rcu(&parms->rcu_head, neigh_rcu_free_parms); 1667 } 1668 EXPORT_SYMBOL(neigh_parms_release); 1669 1670 static void neigh_parms_destroy(struct neigh_parms *parms) 1671 { 1672 kfree(parms); 1673 } 1674 1675 static struct lock_class_key neigh_table_proxy_queue_class; 1676 1677 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly; 1678 1679 void neigh_table_init(int index, struct neigh_table *tbl) 1680 { 1681 unsigned long now = jiffies; 1682 unsigned long phsize; 1683 1684 INIT_LIST_HEAD(&tbl->parms_list); 1685 INIT_LIST_HEAD(&tbl->gc_list); 1686 list_add(&tbl->parms.list, &tbl->parms_list); 1687 write_pnet(&tbl->parms.net, &init_net); 1688 refcount_set(&tbl->parms.refcnt, 1); 1689 tbl->parms.reachable_time = 1690 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); 1691 1692 tbl->stats = alloc_percpu(struct neigh_statistics); 1693 if (!tbl->stats) 1694 panic("cannot create neighbour cache statistics"); 1695 1696 #ifdef CONFIG_PROC_FS 1697 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat, 1698 &neigh_stat_seq_ops, tbl)) 1699 panic("cannot create neighbour proc dir entry"); 1700 #endif 1701 1702 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3)); 1703 1704 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); 1705 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); 1706 1707 if (!tbl->nht || !tbl->phash_buckets) 1708 panic("cannot allocate neighbour cache hashes"); 1709 1710 if (!tbl->entry_size) 1711 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) + 1712 tbl->key_len, NEIGH_PRIV_ALIGN); 1713 else 1714 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); 1715 1716 rwlock_init(&tbl->lock); 1717 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); 1718 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 1719 tbl->parms.reachable_time); 1720 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0); 1721 skb_queue_head_init_class(&tbl->proxy_queue, 1722 &neigh_table_proxy_queue_class); 1723 1724 tbl->last_flush = now; 1725 tbl->last_rand = now + tbl->parms.reachable_time * 20; 1726 1727 neigh_tables[index] = tbl; 1728 } 1729 EXPORT_SYMBOL(neigh_table_init); 1730 1731 int neigh_table_clear(int index, struct neigh_table *tbl) 1732 { 1733 neigh_tables[index] = NULL; 1734 /* It is not clean... Fix it to unload IPv6 module safely */ 1735 cancel_delayed_work_sync(&tbl->gc_work); 1736 del_timer_sync(&tbl->proxy_timer); 1737 pneigh_queue_purge(&tbl->proxy_queue); 1738 neigh_ifdown(tbl, NULL); 1739 if (atomic_read(&tbl->entries)) 1740 pr_crit("neighbour leakage\n"); 1741 1742 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, 1743 neigh_hash_free_rcu); 1744 tbl->nht = NULL; 1745 1746 kfree(tbl->phash_buckets); 1747 tbl->phash_buckets = NULL; 1748 1749 remove_proc_entry(tbl->id, init_net.proc_net_stat); 1750 1751 free_percpu(tbl->stats); 1752 tbl->stats = NULL; 1753 1754 return 0; 1755 } 1756 EXPORT_SYMBOL(neigh_table_clear); 1757 1758 static struct neigh_table *neigh_find_table(int family) 1759 { 1760 struct neigh_table *tbl = NULL; 1761 1762 switch (family) { 1763 case AF_INET: 1764 tbl = neigh_tables[NEIGH_ARP_TABLE]; 1765 break; 1766 case AF_INET6: 1767 tbl = neigh_tables[NEIGH_ND_TABLE]; 1768 break; 1769 case AF_DECnet: 1770 tbl = neigh_tables[NEIGH_DN_TABLE]; 1771 break; 1772 } 1773 1774 return tbl; 1775 } 1776 1777 const struct nla_policy nda_policy[NDA_MAX+1] = { 1778 [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID }, 1779 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, 1780 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, 1781 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) }, 1782 [NDA_PROBES] = { .type = NLA_U32 }, 1783 [NDA_VLAN] = { .type = NLA_U16 }, 1784 [NDA_PORT] = { .type = NLA_U16 }, 1785 [NDA_VNI] = { .type = NLA_U32 }, 1786 [NDA_IFINDEX] = { .type = NLA_U32 }, 1787 [NDA_MASTER] = { .type = NLA_U32 }, 1788 [NDA_PROTOCOL] = { .type = NLA_U8 }, 1789 [NDA_NH_ID] = { .type = NLA_U32 }, 1790 [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED }, 1791 }; 1792 1793 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, 1794 struct netlink_ext_ack *extack) 1795 { 1796 struct net *net = sock_net(skb->sk); 1797 struct ndmsg *ndm; 1798 struct nlattr *dst_attr; 1799 struct neigh_table *tbl; 1800 struct neighbour *neigh; 1801 struct net_device *dev = NULL; 1802 int err = -EINVAL; 1803 1804 ASSERT_RTNL(); 1805 if (nlmsg_len(nlh) < sizeof(*ndm)) 1806 goto out; 1807 1808 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST); 1809 if (!dst_attr) { 1810 NL_SET_ERR_MSG(extack, "Network address not specified"); 1811 goto out; 1812 } 1813 1814 ndm = nlmsg_data(nlh); 1815 if (ndm->ndm_ifindex) { 1816 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 1817 if (dev == NULL) { 1818 err = -ENODEV; 1819 goto out; 1820 } 1821 } 1822 1823 tbl = neigh_find_table(ndm->ndm_family); 1824 if (tbl == NULL) 1825 return -EAFNOSUPPORT; 1826 1827 if (nla_len(dst_attr) < (int)tbl->key_len) { 1828 NL_SET_ERR_MSG(extack, "Invalid network address"); 1829 goto out; 1830 } 1831 1832 if (ndm->ndm_flags & NTF_PROXY) { 1833 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); 1834 goto out; 1835 } 1836 1837 if (dev == NULL) 1838 goto out; 1839 1840 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); 1841 if (neigh == NULL) { 1842 err = -ENOENT; 1843 goto out; 1844 } 1845 1846 err = __neigh_update(neigh, NULL, NUD_FAILED, 1847 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, 1848 NETLINK_CB(skb).portid, extack); 1849 write_lock_bh(&tbl->lock); 1850 neigh_release(neigh); 1851 neigh_remove_one(neigh, tbl); 1852 write_unlock_bh(&tbl->lock); 1853 1854 out: 1855 return err; 1856 } 1857 1858 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, 1859 struct netlink_ext_ack *extack) 1860 { 1861 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE | 1862 NEIGH_UPDATE_F_OVERRIDE_ISROUTER; 1863 struct net *net = sock_net(skb->sk); 1864 struct ndmsg *ndm; 1865 struct nlattr *tb[NDA_MAX+1]; 1866 struct neigh_table *tbl; 1867 struct net_device *dev = NULL; 1868 struct neighbour *neigh; 1869 void *dst, *lladdr; 1870 u8 protocol = 0; 1871 int err; 1872 1873 ASSERT_RTNL(); 1874 err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, 1875 nda_policy, extack); 1876 if (err < 0) 1877 goto out; 1878 1879 err = -EINVAL; 1880 if (!tb[NDA_DST]) { 1881 NL_SET_ERR_MSG(extack, "Network address not specified"); 1882 goto out; 1883 } 1884 1885 ndm = nlmsg_data(nlh); 1886 if (ndm->ndm_ifindex) { 1887 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 1888 if (dev == NULL) { 1889 err = -ENODEV; 1890 goto out; 1891 } 1892 1893 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) { 1894 NL_SET_ERR_MSG(extack, "Invalid link address"); 1895 goto out; 1896 } 1897 } 1898 1899 tbl = neigh_find_table(ndm->ndm_family); 1900 if (tbl == NULL) 1901 return -EAFNOSUPPORT; 1902 1903 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) { 1904 NL_SET_ERR_MSG(extack, "Invalid network address"); 1905 goto out; 1906 } 1907 1908 dst = nla_data(tb[NDA_DST]); 1909 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; 1910 1911 if (tb[NDA_PROTOCOL]) 1912 protocol = nla_get_u8(tb[NDA_PROTOCOL]); 1913 1914 if (ndm->ndm_flags & NTF_PROXY) { 1915 struct pneigh_entry *pn; 1916 1917 err = -ENOBUFS; 1918 pn = pneigh_lookup(tbl, net, dst, dev, 1); 1919 if (pn) { 1920 pn->flags = ndm->ndm_flags; 1921 if (protocol) 1922 pn->protocol = protocol; 1923 err = 0; 1924 } 1925 goto out; 1926 } 1927 1928 if (!dev) { 1929 NL_SET_ERR_MSG(extack, "Device not specified"); 1930 goto out; 1931 } 1932 1933 if (tbl->allow_add && !tbl->allow_add(dev, extack)) { 1934 err = -EINVAL; 1935 goto out; 1936 } 1937 1938 neigh = neigh_lookup(tbl, dst, dev); 1939 if (neigh == NULL) { 1940 bool exempt_from_gc; 1941 1942 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { 1943 err = -ENOENT; 1944 goto out; 1945 } 1946 1947 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || 1948 ndm->ndm_flags & NTF_EXT_LEARNED; 1949 neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true); 1950 if (IS_ERR(neigh)) { 1951 err = PTR_ERR(neigh); 1952 goto out; 1953 } 1954 } else { 1955 if (nlh->nlmsg_flags & NLM_F_EXCL) { 1956 err = -EEXIST; 1957 neigh_release(neigh); 1958 goto out; 1959 } 1960 1961 if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) 1962 flags &= ~(NEIGH_UPDATE_F_OVERRIDE | 1963 NEIGH_UPDATE_F_OVERRIDE_ISROUTER); 1964 } 1965 1966 if (protocol) 1967 neigh->protocol = protocol; 1968 1969 if (ndm->ndm_flags & NTF_EXT_LEARNED) 1970 flags |= NEIGH_UPDATE_F_EXT_LEARNED; 1971 1972 if (ndm->ndm_flags & NTF_ROUTER) 1973 flags |= NEIGH_UPDATE_F_ISROUTER; 1974 1975 if (ndm->ndm_flags & NTF_USE) { 1976 neigh_event_send(neigh, NULL); 1977 err = 0; 1978 } else 1979 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, 1980 NETLINK_CB(skb).portid, extack); 1981 1982 neigh_release(neigh); 1983 1984 out: 1985 return err; 1986 } 1987 1988 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) 1989 { 1990 struct nlattr *nest; 1991 1992 nest = nla_nest_start_noflag(skb, NDTA_PARMS); 1993 if (nest == NULL) 1994 return -ENOBUFS; 1995 1996 if ((parms->dev && 1997 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || 1998 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) || 1999 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, 2000 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) || 2001 /* approximative value for deprecated QUEUE_LEN (in packets) */ 2002 nla_put_u32(skb, NDTPA_QUEUE_LEN, 2003 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) || 2004 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) || 2005 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) || 2006 nla_put_u32(skb, NDTPA_UCAST_PROBES, 2007 NEIGH_VAR(parms, UCAST_PROBES)) || 2008 nla_put_u32(skb, NDTPA_MCAST_PROBES, 2009 NEIGH_VAR(parms, MCAST_PROBES)) || 2010 nla_put_u32(skb, NDTPA_MCAST_REPROBES, 2011 NEIGH_VAR(parms, MCAST_REPROBES)) || 2012 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time, 2013 NDTPA_PAD) || 2014 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, 2015 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) || 2016 nla_put_msecs(skb, NDTPA_GC_STALETIME, 2017 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) || 2018 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, 2019 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) || 2020 nla_put_msecs(skb, NDTPA_RETRANS_TIME, 2021 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) || 2022 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, 2023 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) || 2024 nla_put_msecs(skb, NDTPA_PROXY_DELAY, 2025 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) || 2026 nla_put_msecs(skb, NDTPA_LOCKTIME, 2027 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD)) 2028 goto nla_put_failure; 2029 return nla_nest_end(skb, nest); 2030 2031 nla_put_failure: 2032 nla_nest_cancel(skb, nest); 2033 return -EMSGSIZE; 2034 } 2035 2036 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, 2037 u32 pid, u32 seq, int type, int flags) 2038 { 2039 struct nlmsghdr *nlh; 2040 struct ndtmsg *ndtmsg; 2041 2042 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); 2043 if (nlh == NULL) 2044 return -EMSGSIZE; 2045 2046 ndtmsg = nlmsg_data(nlh); 2047 2048 read_lock_bh(&tbl->lock); 2049 ndtmsg->ndtm_family = tbl->family; 2050 ndtmsg->ndtm_pad1 = 0; 2051 ndtmsg->ndtm_pad2 = 0; 2052 2053 if (nla_put_string(skb, NDTA_NAME, tbl->id) || 2054 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) || 2055 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) || 2056 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) || 2057 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3)) 2058 goto nla_put_failure; 2059 { 2060 unsigned long now = jiffies; 2061 long flush_delta = now - tbl->last_flush; 2062 long rand_delta = now - tbl->last_rand; 2063 struct neigh_hash_table *nht; 2064 struct ndt_config ndc = { 2065 .ndtc_key_len = tbl->key_len, 2066 .ndtc_entry_size = tbl->entry_size, 2067 .ndtc_entries = atomic_read(&tbl->entries), 2068 .ndtc_last_flush = jiffies_to_msecs(flush_delta), 2069 .ndtc_last_rand = jiffies_to_msecs(rand_delta), 2070 .ndtc_proxy_qlen = tbl->proxy_queue.qlen, 2071 }; 2072 2073 rcu_read_lock_bh(); 2074 nht = rcu_dereference_bh(tbl->nht); 2075 ndc.ndtc_hash_rnd = nht->hash_rnd[0]; 2076 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); 2077 rcu_read_unlock_bh(); 2078 2079 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc)) 2080 goto nla_put_failure; 2081 } 2082 2083 { 2084 int cpu; 2085 struct ndt_stats ndst; 2086 2087 memset(&ndst, 0, sizeof(ndst)); 2088 2089 for_each_possible_cpu(cpu) { 2090 struct neigh_statistics *st; 2091 2092 st = per_cpu_ptr(tbl->stats, cpu); 2093 ndst.ndts_allocs += st->allocs; 2094 ndst.ndts_destroys += st->destroys; 2095 ndst.ndts_hash_grows += st->hash_grows; 2096 ndst.ndts_res_failed += st->res_failed; 2097 ndst.ndts_lookups += st->lookups; 2098 ndst.ndts_hits += st->hits; 2099 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast; 2100 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast; 2101 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs; 2102 ndst.ndts_forced_gc_runs += st->forced_gc_runs; 2103 ndst.ndts_table_fulls += st->table_fulls; 2104 } 2105 2106 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst, 2107 NDTA_PAD)) 2108 goto nla_put_failure; 2109 } 2110 2111 BUG_ON(tbl->parms.dev); 2112 if (neightbl_fill_parms(skb, &tbl->parms) < 0) 2113 goto nla_put_failure; 2114 2115 read_unlock_bh(&tbl->lock); 2116 nlmsg_end(skb, nlh); 2117 return 0; 2118 2119 nla_put_failure: 2120 read_unlock_bh(&tbl->lock); 2121 nlmsg_cancel(skb, nlh); 2122 return -EMSGSIZE; 2123 } 2124 2125 static int neightbl_fill_param_info(struct sk_buff *skb, 2126 struct neigh_table *tbl, 2127 struct neigh_parms *parms, 2128 u32 pid, u32 seq, int type, 2129 unsigned int flags) 2130 { 2131 struct ndtmsg *ndtmsg; 2132 struct nlmsghdr *nlh; 2133 2134 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); 2135 if (nlh == NULL) 2136 return -EMSGSIZE; 2137 2138 ndtmsg = nlmsg_data(nlh); 2139 2140 read_lock_bh(&tbl->lock); 2141 ndtmsg->ndtm_family = tbl->family; 2142 ndtmsg->ndtm_pad1 = 0; 2143 ndtmsg->ndtm_pad2 = 0; 2144 2145 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 || 2146 neightbl_fill_parms(skb, parms) < 0) 2147 goto errout; 2148 2149 read_unlock_bh(&tbl->lock); 2150 nlmsg_end(skb, nlh); 2151 return 0; 2152 errout: 2153 read_unlock_bh(&tbl->lock); 2154 nlmsg_cancel(skb, nlh); 2155 return -EMSGSIZE; 2156 } 2157 2158 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { 2159 [NDTA_NAME] = { .type = NLA_STRING }, 2160 [NDTA_THRESH1] = { .type = NLA_U32 }, 2161 [NDTA_THRESH2] = { .type = NLA_U32 }, 2162 [NDTA_THRESH3] = { .type = NLA_U32 }, 2163 [NDTA_GC_INTERVAL] = { .type = NLA_U64 }, 2164 [NDTA_PARMS] = { .type = NLA_NESTED }, 2165 }; 2166 2167 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { 2168 [NDTPA_IFINDEX] = { .type = NLA_U32 }, 2169 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, 2170 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, 2171 [NDTPA_APP_PROBES] = { .type = NLA_U32 }, 2172 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, 2173 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 }, 2174 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 }, 2175 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, 2176 [NDTPA_GC_STALETIME] = { .type = NLA_U64 }, 2177 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 }, 2178 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 }, 2179 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 }, 2180 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 }, 2181 [NDTPA_LOCKTIME] = { .type = NLA_U64 }, 2182 }; 2183 2184 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, 2185 struct netlink_ext_ack *extack) 2186 { 2187 struct net *net = sock_net(skb->sk); 2188 struct neigh_table *tbl; 2189 struct ndtmsg *ndtmsg; 2190 struct nlattr *tb[NDTA_MAX+1]; 2191 bool found = false; 2192 int err, tidx; 2193 2194 err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, 2195 nl_neightbl_policy, extack); 2196 if (err < 0) 2197 goto errout; 2198 2199 if (tb[NDTA_NAME] == NULL) { 2200 err = -EINVAL; 2201 goto errout; 2202 } 2203 2204 ndtmsg = nlmsg_data(nlh); 2205 2206 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { 2207 tbl = neigh_tables[tidx]; 2208 if (!tbl) 2209 continue; 2210 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) 2211 continue; 2212 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) { 2213 found = true; 2214 break; 2215 } 2216 } 2217 2218 if (!found) 2219 return -ENOENT; 2220 2221 /* 2222 * We acquire tbl->lock to be nice to the periodic timers and 2223 * make sure they always see a consistent set of values. 2224 */ 2225 write_lock_bh(&tbl->lock); 2226 2227 if (tb[NDTA_PARMS]) { 2228 struct nlattr *tbp[NDTPA_MAX+1]; 2229 struct neigh_parms *p; 2230 int i, ifindex = 0; 2231 2232 err = nla_parse_nested_deprecated(tbp, NDTPA_MAX, 2233 tb[NDTA_PARMS], 2234 nl_ntbl_parm_policy, extack); 2235 if (err < 0) 2236 goto errout_tbl_lock; 2237 2238 if (tbp[NDTPA_IFINDEX]) 2239 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); 2240 2241 p = lookup_neigh_parms(tbl, net, ifindex); 2242 if (p == NULL) { 2243 err = -ENOENT; 2244 goto errout_tbl_lock; 2245 } 2246 2247 for (i = 1; i <= NDTPA_MAX; i++) { 2248 if (tbp[i] == NULL) 2249 continue; 2250 2251 switch (i) { 2252 case NDTPA_QUEUE_LEN: 2253 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, 2254 nla_get_u32(tbp[i]) * 2255 SKB_TRUESIZE(ETH_FRAME_LEN)); 2256 break; 2257 case NDTPA_QUEUE_LENBYTES: 2258 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, 2259 nla_get_u32(tbp[i])); 2260 break; 2261 case NDTPA_PROXY_QLEN: 2262 NEIGH_VAR_SET(p, PROXY_QLEN, 2263 nla_get_u32(tbp[i])); 2264 break; 2265 case NDTPA_APP_PROBES: 2266 NEIGH_VAR_SET(p, APP_PROBES, 2267 nla_get_u32(tbp[i])); 2268 break; 2269 case NDTPA_UCAST_PROBES: 2270 NEIGH_VAR_SET(p, UCAST_PROBES, 2271 nla_get_u32(tbp[i])); 2272 break; 2273 case NDTPA_MCAST_PROBES: 2274 NEIGH_VAR_SET(p, MCAST_PROBES, 2275 nla_get_u32(tbp[i])); 2276 break; 2277 case NDTPA_MCAST_REPROBES: 2278 NEIGH_VAR_SET(p, MCAST_REPROBES, 2279 nla_get_u32(tbp[i])); 2280 break; 2281 case NDTPA_BASE_REACHABLE_TIME: 2282 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, 2283 nla_get_msecs(tbp[i])); 2284 /* update reachable_time as well, otherwise, the change will 2285 * only be effective after the next time neigh_periodic_work 2286 * decides to recompute it (can be multiple minutes) 2287 */ 2288 p->reachable_time = 2289 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 2290 break; 2291 case NDTPA_GC_STALETIME: 2292 NEIGH_VAR_SET(p, GC_STALETIME, 2293 nla_get_msecs(tbp[i])); 2294 break; 2295 case NDTPA_DELAY_PROBE_TIME: 2296 NEIGH_VAR_SET(p, DELAY_PROBE_TIME, 2297 nla_get_msecs(tbp[i])); 2298 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); 2299 break; 2300 case NDTPA_RETRANS_TIME: 2301 NEIGH_VAR_SET(p, RETRANS_TIME, 2302 nla_get_msecs(tbp[i])); 2303 break; 2304 case NDTPA_ANYCAST_DELAY: 2305 NEIGH_VAR_SET(p, ANYCAST_DELAY, 2306 nla_get_msecs(tbp[i])); 2307 break; 2308 case NDTPA_PROXY_DELAY: 2309 NEIGH_VAR_SET(p, PROXY_DELAY, 2310 nla_get_msecs(tbp[i])); 2311 break; 2312 case NDTPA_LOCKTIME: 2313 NEIGH_VAR_SET(p, LOCKTIME, 2314 nla_get_msecs(tbp[i])); 2315 break; 2316 } 2317 } 2318 } 2319 2320 err = -ENOENT; 2321 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] || 2322 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) && 2323 !net_eq(net, &init_net)) 2324 goto errout_tbl_lock; 2325 2326 if (tb[NDTA_THRESH1]) 2327 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); 2328 2329 if (tb[NDTA_THRESH2]) 2330 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]); 2331 2332 if (tb[NDTA_THRESH3]) 2333 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]); 2334 2335 if (tb[NDTA_GC_INTERVAL]) 2336 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]); 2337 2338 err = 0; 2339 2340 errout_tbl_lock: 2341 write_unlock_bh(&tbl->lock); 2342 errout: 2343 return err; 2344 } 2345 2346 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh, 2347 struct netlink_ext_ack *extack) 2348 { 2349 struct ndtmsg *ndtm; 2350 2351 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) { 2352 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request"); 2353 return -EINVAL; 2354 } 2355 2356 ndtm = nlmsg_data(nlh); 2357 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) { 2358 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request"); 2359 return -EINVAL; 2360 } 2361 2362 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) { 2363 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request"); 2364 return -EINVAL; 2365 } 2366 2367 return 0; 2368 } 2369 2370 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2371 { 2372 const struct nlmsghdr *nlh = cb->nlh; 2373 struct net *net = sock_net(skb->sk); 2374 int family, tidx, nidx = 0; 2375 int tbl_skip = cb->args[0]; 2376 int neigh_skip = cb->args[1]; 2377 struct neigh_table *tbl; 2378 2379 if (cb->strict_check) { 2380 int err = neightbl_valid_dump_info(nlh, cb->extack); 2381 2382 if (err < 0) 2383 return err; 2384 } 2385 2386 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; 2387 2388 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { 2389 struct neigh_parms *p; 2390 2391 tbl = neigh_tables[tidx]; 2392 if (!tbl) 2393 continue; 2394 2395 if (tidx < tbl_skip || (family && tbl->family != family)) 2396 continue; 2397 2398 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, 2399 nlh->nlmsg_seq, RTM_NEWNEIGHTBL, 2400 NLM_F_MULTI) < 0) 2401 break; 2402 2403 nidx = 0; 2404 p = list_next_entry(&tbl->parms, list); 2405 list_for_each_entry_from(p, &tbl->parms_list, list) { 2406 if (!net_eq(neigh_parms_net(p), net)) 2407 continue; 2408 2409 if (nidx < neigh_skip) 2410 goto next; 2411 2412 if (neightbl_fill_param_info(skb, tbl, p, 2413 NETLINK_CB(cb->skb).portid, 2414 nlh->nlmsg_seq, 2415 RTM_NEWNEIGHTBL, 2416 NLM_F_MULTI) < 0) 2417 goto out; 2418 next: 2419 nidx++; 2420 } 2421 2422 neigh_skip = 0; 2423 } 2424 out: 2425 cb->args[0] = tidx; 2426 cb->args[1] = nidx; 2427 2428 return skb->len; 2429 } 2430 2431 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, 2432 u32 pid, u32 seq, int type, unsigned int flags) 2433 { 2434 unsigned long now = jiffies; 2435 struct nda_cacheinfo ci; 2436 struct nlmsghdr *nlh; 2437 struct ndmsg *ndm; 2438 2439 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); 2440 if (nlh == NULL) 2441 return -EMSGSIZE; 2442 2443 ndm = nlmsg_data(nlh); 2444 ndm->ndm_family = neigh->ops->family; 2445 ndm->ndm_pad1 = 0; 2446 ndm->ndm_pad2 = 0; 2447 ndm->ndm_flags = neigh->flags; 2448 ndm->ndm_type = neigh->type; 2449 ndm->ndm_ifindex = neigh->dev->ifindex; 2450 2451 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key)) 2452 goto nla_put_failure; 2453 2454 read_lock_bh(&neigh->lock); 2455 ndm->ndm_state = neigh->nud_state; 2456 if (neigh->nud_state & NUD_VALID) { 2457 char haddr[MAX_ADDR_LEN]; 2458 2459 neigh_ha_snapshot(haddr, neigh, neigh->dev); 2460 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) { 2461 read_unlock_bh(&neigh->lock); 2462 goto nla_put_failure; 2463 } 2464 } 2465 2466 ci.ndm_used = jiffies_to_clock_t(now - neigh->used); 2467 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed); 2468 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated); 2469 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1; 2470 read_unlock_bh(&neigh->lock); 2471 2472 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) || 2473 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) 2474 goto nla_put_failure; 2475 2476 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol)) 2477 goto nla_put_failure; 2478 2479 nlmsg_end(skb, nlh); 2480 return 0; 2481 2482 nla_put_failure: 2483 nlmsg_cancel(skb, nlh); 2484 return -EMSGSIZE; 2485 } 2486 2487 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, 2488 u32 pid, u32 seq, int type, unsigned int flags, 2489 struct neigh_table *tbl) 2490 { 2491 struct nlmsghdr *nlh; 2492 struct ndmsg *ndm; 2493 2494 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); 2495 if (nlh == NULL) 2496 return -EMSGSIZE; 2497 2498 ndm = nlmsg_data(nlh); 2499 ndm->ndm_family = tbl->family; 2500 ndm->ndm_pad1 = 0; 2501 ndm->ndm_pad2 = 0; 2502 ndm->ndm_flags = pn->flags | NTF_PROXY; 2503 ndm->ndm_type = RTN_UNICAST; 2504 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0; 2505 ndm->ndm_state = NUD_NONE; 2506 2507 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) 2508 goto nla_put_failure; 2509 2510 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol)) 2511 goto nla_put_failure; 2512 2513 nlmsg_end(skb, nlh); 2514 return 0; 2515 2516 nla_put_failure: 2517 nlmsg_cancel(skb, nlh); 2518 return -EMSGSIZE; 2519 } 2520 2521 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid) 2522 { 2523 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); 2524 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid); 2525 } 2526 2527 static bool neigh_master_filtered(struct net_device *dev, int master_idx) 2528 { 2529 struct net_device *master; 2530 2531 if (!master_idx) 2532 return false; 2533 2534 master = dev ? netdev_master_upper_dev_get(dev) : NULL; 2535 if (!master || master->ifindex != master_idx) 2536 return true; 2537 2538 return false; 2539 } 2540 2541 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx) 2542 { 2543 if (filter_idx && (!dev || dev->ifindex != filter_idx)) 2544 return true; 2545 2546 return false; 2547 } 2548 2549 struct neigh_dump_filter { 2550 int master_idx; 2551 int dev_idx; 2552 }; 2553 2554 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2555 struct netlink_callback *cb, 2556 struct neigh_dump_filter *filter) 2557 { 2558 struct net *net = sock_net(skb->sk); 2559 struct neighbour *n; 2560 int rc, h, s_h = cb->args[1]; 2561 int idx, s_idx = idx = cb->args[2]; 2562 struct neigh_hash_table *nht; 2563 unsigned int flags = NLM_F_MULTI; 2564 2565 if (filter->dev_idx || filter->master_idx) 2566 flags |= NLM_F_DUMP_FILTERED; 2567 2568 rcu_read_lock_bh(); 2569 nht = rcu_dereference_bh(tbl->nht); 2570 2571 for (h = s_h; h < (1 << nht->hash_shift); h++) { 2572 if (h > s_h) 2573 s_idx = 0; 2574 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; 2575 n != NULL; 2576 n = rcu_dereference_bh(n->next)) { 2577 if (idx < s_idx || !net_eq(dev_net(n->dev), net)) 2578 goto next; 2579 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || 2580 neigh_master_filtered(n->dev, filter->master_idx)) 2581 goto next; 2582 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2583 cb->nlh->nlmsg_seq, 2584 RTM_NEWNEIGH, 2585 flags) < 0) { 2586 rc = -1; 2587 goto out; 2588 } 2589 next: 2590 idx++; 2591 } 2592 } 2593 rc = skb->len; 2594 out: 2595 rcu_read_unlock_bh(); 2596 cb->args[1] = h; 2597 cb->args[2] = idx; 2598 return rc; 2599 } 2600 2601 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2602 struct netlink_callback *cb, 2603 struct neigh_dump_filter *filter) 2604 { 2605 struct pneigh_entry *n; 2606 struct net *net = sock_net(skb->sk); 2607 int rc, h, s_h = cb->args[3]; 2608 int idx, s_idx = idx = cb->args[4]; 2609 unsigned int flags = NLM_F_MULTI; 2610 2611 if (filter->dev_idx || filter->master_idx) 2612 flags |= NLM_F_DUMP_FILTERED; 2613 2614 read_lock_bh(&tbl->lock); 2615 2616 for (h = s_h; h <= PNEIGH_HASHMASK; h++) { 2617 if (h > s_h) 2618 s_idx = 0; 2619 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { 2620 if (idx < s_idx || pneigh_net(n) != net) 2621 goto next; 2622 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || 2623 neigh_master_filtered(n->dev, filter->master_idx)) 2624 goto next; 2625 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2626 cb->nlh->nlmsg_seq, 2627 RTM_NEWNEIGH, flags, tbl) < 0) { 2628 read_unlock_bh(&tbl->lock); 2629 rc = -1; 2630 goto out; 2631 } 2632 next: 2633 idx++; 2634 } 2635 } 2636 2637 read_unlock_bh(&tbl->lock); 2638 rc = skb->len; 2639 out: 2640 cb->args[3] = h; 2641 cb->args[4] = idx; 2642 return rc; 2643 2644 } 2645 2646 static int neigh_valid_dump_req(const struct nlmsghdr *nlh, 2647 bool strict_check, 2648 struct neigh_dump_filter *filter, 2649 struct netlink_ext_ack *extack) 2650 { 2651 struct nlattr *tb[NDA_MAX + 1]; 2652 int err, i; 2653 2654 if (strict_check) { 2655 struct ndmsg *ndm; 2656 2657 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { 2658 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request"); 2659 return -EINVAL; 2660 } 2661 2662 ndm = nlmsg_data(nlh); 2663 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex || 2664 ndm->ndm_state || ndm->ndm_type) { 2665 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request"); 2666 return -EINVAL; 2667 } 2668 2669 if (ndm->ndm_flags & ~NTF_PROXY) { 2670 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request"); 2671 return -EINVAL; 2672 } 2673 2674 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), 2675 tb, NDA_MAX, nda_policy, 2676 extack); 2677 } else { 2678 err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb, 2679 NDA_MAX, nda_policy, extack); 2680 } 2681 if (err < 0) 2682 return err; 2683 2684 for (i = 0; i <= NDA_MAX; ++i) { 2685 if (!tb[i]) 2686 continue; 2687 2688 /* all new attributes should require strict_check */ 2689 switch (i) { 2690 case NDA_IFINDEX: 2691 filter->dev_idx = nla_get_u32(tb[i]); 2692 break; 2693 case NDA_MASTER: 2694 filter->master_idx = nla_get_u32(tb[i]); 2695 break; 2696 default: 2697 if (strict_check) { 2698 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request"); 2699 return -EINVAL; 2700 } 2701 } 2702 } 2703 2704 return 0; 2705 } 2706 2707 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2708 { 2709 const struct nlmsghdr *nlh = cb->nlh; 2710 struct neigh_dump_filter filter = {}; 2711 struct neigh_table *tbl; 2712 int t, family, s_t; 2713 int proxy = 0; 2714 int err; 2715 2716 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; 2717 2718 /* check for full ndmsg structure presence, family member is 2719 * the same for both structures 2720 */ 2721 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) && 2722 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY) 2723 proxy = 1; 2724 2725 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack); 2726 if (err < 0 && cb->strict_check) 2727 return err; 2728 2729 s_t = cb->args[0]; 2730 2731 for (t = 0; t < NEIGH_NR_TABLES; t++) { 2732 tbl = neigh_tables[t]; 2733 2734 if (!tbl) 2735 continue; 2736 if (t < s_t || (family && tbl->family != family)) 2737 continue; 2738 if (t > s_t) 2739 memset(&cb->args[1], 0, sizeof(cb->args) - 2740 sizeof(cb->args[0])); 2741 if (proxy) 2742 err = pneigh_dump_table(tbl, skb, cb, &filter); 2743 else 2744 err = neigh_dump_table(tbl, skb, cb, &filter); 2745 if (err < 0) 2746 break; 2747 } 2748 2749 cb->args[0] = t; 2750 return skb->len; 2751 } 2752 2753 static int neigh_valid_get_req(const struct nlmsghdr *nlh, 2754 struct neigh_table **tbl, 2755 void **dst, int *dev_idx, u8 *ndm_flags, 2756 struct netlink_ext_ack *extack) 2757 { 2758 struct nlattr *tb[NDA_MAX + 1]; 2759 struct ndmsg *ndm; 2760 int err, i; 2761 2762 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { 2763 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request"); 2764 return -EINVAL; 2765 } 2766 2767 ndm = nlmsg_data(nlh); 2768 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || 2769 ndm->ndm_type) { 2770 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request"); 2771 return -EINVAL; 2772 } 2773 2774 if (ndm->ndm_flags & ~NTF_PROXY) { 2775 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request"); 2776 return -EINVAL; 2777 } 2778 2779 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb, 2780 NDA_MAX, nda_policy, extack); 2781 if (err < 0) 2782 return err; 2783 2784 *ndm_flags = ndm->ndm_flags; 2785 *dev_idx = ndm->ndm_ifindex; 2786 *tbl = neigh_find_table(ndm->ndm_family); 2787 if (*tbl == NULL) { 2788 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request"); 2789 return -EAFNOSUPPORT; 2790 } 2791 2792 for (i = 0; i <= NDA_MAX; ++i) { 2793 if (!tb[i]) 2794 continue; 2795 2796 switch (i) { 2797 case NDA_DST: 2798 if (nla_len(tb[i]) != (int)(*tbl)->key_len) { 2799 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request"); 2800 return -EINVAL; 2801 } 2802 *dst = nla_data(tb[i]); 2803 break; 2804 default: 2805 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request"); 2806 return -EINVAL; 2807 } 2808 } 2809 2810 return 0; 2811 } 2812 2813 static inline size_t neigh_nlmsg_size(void) 2814 { 2815 return NLMSG_ALIGN(sizeof(struct ndmsg)) 2816 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ 2817 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */ 2818 + nla_total_size(sizeof(struct nda_cacheinfo)) 2819 + nla_total_size(4) /* NDA_PROBES */ 2820 + nla_total_size(1); /* NDA_PROTOCOL */ 2821 } 2822 2823 static int neigh_get_reply(struct net *net, struct neighbour *neigh, 2824 u32 pid, u32 seq) 2825 { 2826 struct sk_buff *skb; 2827 int err = 0; 2828 2829 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL); 2830 if (!skb) 2831 return -ENOBUFS; 2832 2833 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0); 2834 if (err) { 2835 kfree_skb(skb); 2836 goto errout; 2837 } 2838 2839 err = rtnl_unicast(skb, net, pid); 2840 errout: 2841 return err; 2842 } 2843 2844 static inline size_t pneigh_nlmsg_size(void) 2845 { 2846 return NLMSG_ALIGN(sizeof(struct ndmsg)) 2847 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ 2848 + nla_total_size(1); /* NDA_PROTOCOL */ 2849 } 2850 2851 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh, 2852 u32 pid, u32 seq, struct neigh_table *tbl) 2853 { 2854 struct sk_buff *skb; 2855 int err = 0; 2856 2857 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL); 2858 if (!skb) 2859 return -ENOBUFS; 2860 2861 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl); 2862 if (err) { 2863 kfree_skb(skb); 2864 goto errout; 2865 } 2866 2867 err = rtnl_unicast(skb, net, pid); 2868 errout: 2869 return err; 2870 } 2871 2872 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2873 struct netlink_ext_ack *extack) 2874 { 2875 struct net *net = sock_net(in_skb->sk); 2876 struct net_device *dev = NULL; 2877 struct neigh_table *tbl = NULL; 2878 struct neighbour *neigh; 2879 void *dst = NULL; 2880 u8 ndm_flags = 0; 2881 int dev_idx = 0; 2882 int err; 2883 2884 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags, 2885 extack); 2886 if (err < 0) 2887 return err; 2888 2889 if (dev_idx) { 2890 dev = __dev_get_by_index(net, dev_idx); 2891 if (!dev) { 2892 NL_SET_ERR_MSG(extack, "Unknown device ifindex"); 2893 return -ENODEV; 2894 } 2895 } 2896 2897 if (!dst) { 2898 NL_SET_ERR_MSG(extack, "Network address not specified"); 2899 return -EINVAL; 2900 } 2901 2902 if (ndm_flags & NTF_PROXY) { 2903 struct pneigh_entry *pn; 2904 2905 pn = pneigh_lookup(tbl, net, dst, dev, 0); 2906 if (!pn) { 2907 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found"); 2908 return -ENOENT; 2909 } 2910 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid, 2911 nlh->nlmsg_seq, tbl); 2912 } 2913 2914 if (!dev) { 2915 NL_SET_ERR_MSG(extack, "No device specified"); 2916 return -EINVAL; 2917 } 2918 2919 neigh = neigh_lookup(tbl, dst, dev); 2920 if (!neigh) { 2921 NL_SET_ERR_MSG(extack, "Neighbour entry not found"); 2922 return -ENOENT; 2923 } 2924 2925 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid, 2926 nlh->nlmsg_seq); 2927 2928 neigh_release(neigh); 2929 2930 return err; 2931 } 2932 2933 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) 2934 { 2935 int chain; 2936 struct neigh_hash_table *nht; 2937 2938 rcu_read_lock_bh(); 2939 nht = rcu_dereference_bh(tbl->nht); 2940 2941 read_lock(&tbl->lock); /* avoid resizes */ 2942 for (chain = 0; chain < (1 << nht->hash_shift); chain++) { 2943 struct neighbour *n; 2944 2945 for (n = rcu_dereference_bh(nht->hash_buckets[chain]); 2946 n != NULL; 2947 n = rcu_dereference_bh(n->next)) 2948 cb(n, cookie); 2949 } 2950 read_unlock(&tbl->lock); 2951 rcu_read_unlock_bh(); 2952 } 2953 EXPORT_SYMBOL(neigh_for_each); 2954 2955 /* The tbl->lock must be held as a writer and BH disabled. */ 2956 void __neigh_for_each_release(struct neigh_table *tbl, 2957 int (*cb)(struct neighbour *)) 2958 { 2959 int chain; 2960 struct neigh_hash_table *nht; 2961 2962 nht = rcu_dereference_protected(tbl->nht, 2963 lockdep_is_held(&tbl->lock)); 2964 for (chain = 0; chain < (1 << nht->hash_shift); chain++) { 2965 struct neighbour *n; 2966 struct neighbour __rcu **np; 2967 2968 np = &nht->hash_buckets[chain]; 2969 while ((n = rcu_dereference_protected(*np, 2970 lockdep_is_held(&tbl->lock))) != NULL) { 2971 int release; 2972 2973 write_lock(&n->lock); 2974 release = cb(n); 2975 if (release) { 2976 rcu_assign_pointer(*np, 2977 rcu_dereference_protected(n->next, 2978 lockdep_is_held(&tbl->lock))); 2979 neigh_mark_dead(n); 2980 } else 2981 np = &n->next; 2982 write_unlock(&n->lock); 2983 if (release) 2984 neigh_cleanup_and_release(n); 2985 } 2986 } 2987 } 2988 EXPORT_SYMBOL(__neigh_for_each_release); 2989 2990 int neigh_xmit(int index, struct net_device *dev, 2991 const void *addr, struct sk_buff *skb) 2992 { 2993 int err = -EAFNOSUPPORT; 2994 if (likely(index < NEIGH_NR_TABLES)) { 2995 struct neigh_table *tbl; 2996 struct neighbour *neigh; 2997 2998 tbl = neigh_tables[index]; 2999 if (!tbl) 3000 goto out; 3001 rcu_read_lock_bh(); 3002 if (index == NEIGH_ARP_TABLE) { 3003 u32 key = *((u32 *)addr); 3004 3005 neigh = __ipv4_neigh_lookup_noref(dev, key); 3006 } else { 3007 neigh = __neigh_lookup_noref(tbl, addr, dev); 3008 } 3009 if (!neigh) 3010 neigh = __neigh_create(tbl, addr, dev, false); 3011 err = PTR_ERR(neigh); 3012 if (IS_ERR(neigh)) { 3013 rcu_read_unlock_bh(); 3014 goto out_kfree_skb; 3015 } 3016 err = neigh->output(neigh, skb); 3017 rcu_read_unlock_bh(); 3018 } 3019 else if (index == NEIGH_LINK_TABLE) { 3020 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 3021 addr, NULL, skb->len); 3022 if (err < 0) 3023 goto out_kfree_skb; 3024 err = dev_queue_xmit(skb); 3025 } 3026 out: 3027 return err; 3028 out_kfree_skb: 3029 kfree_skb(skb); 3030 goto out; 3031 } 3032 EXPORT_SYMBOL(neigh_xmit); 3033 3034 #ifdef CONFIG_PROC_FS 3035 3036 static struct neighbour *neigh_get_first(struct seq_file *seq) 3037 { 3038 struct neigh_seq_state *state = seq->private; 3039 struct net *net = seq_file_net(seq); 3040 struct neigh_hash_table *nht = state->nht; 3041 struct neighbour *n = NULL; 3042 int bucket; 3043 3044 state->flags &= ~NEIGH_SEQ_IS_PNEIGH; 3045 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { 3046 n = rcu_dereference_bh(nht->hash_buckets[bucket]); 3047 3048 while (n) { 3049 if (!net_eq(dev_net(n->dev), net)) 3050 goto next; 3051 if (state->neigh_sub_iter) { 3052 loff_t fakep = 0; 3053 void *v; 3054 3055 v = state->neigh_sub_iter(state, n, &fakep); 3056 if (!v) 3057 goto next; 3058 } 3059 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) 3060 break; 3061 if (n->nud_state & ~NUD_NOARP) 3062 break; 3063 next: 3064 n = rcu_dereference_bh(n->next); 3065 } 3066 3067 if (n) 3068 break; 3069 } 3070 state->bucket = bucket; 3071 3072 return n; 3073 } 3074 3075 static struct neighbour *neigh_get_next(struct seq_file *seq, 3076 struct neighbour *n, 3077 loff_t *pos) 3078 { 3079 struct neigh_seq_state *state = seq->private; 3080 struct net *net = seq_file_net(seq); 3081 struct neigh_hash_table *nht = state->nht; 3082 3083 if (state->neigh_sub_iter) { 3084 void *v = state->neigh_sub_iter(state, n, pos); 3085 if (v) 3086 return n; 3087 } 3088 n = rcu_dereference_bh(n->next); 3089 3090 while (1) { 3091 while (n) { 3092 if (!net_eq(dev_net(n->dev), net)) 3093 goto next; 3094 if (state->neigh_sub_iter) { 3095 void *v = state->neigh_sub_iter(state, n, pos); 3096 if (v) 3097 return n; 3098 goto next; 3099 } 3100 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) 3101 break; 3102 3103 if (n->nud_state & ~NUD_NOARP) 3104 break; 3105 next: 3106 n = rcu_dereference_bh(n->next); 3107 } 3108 3109 if (n) 3110 break; 3111 3112 if (++state->bucket >= (1 << nht->hash_shift)) 3113 break; 3114 3115 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]); 3116 } 3117 3118 if (n && pos) 3119 --(*pos); 3120 return n; 3121 } 3122 3123 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) 3124 { 3125 struct neighbour *n = neigh_get_first(seq); 3126 3127 if (n) { 3128 --(*pos); 3129 while (*pos) { 3130 n = neigh_get_next(seq, n, pos); 3131 if (!n) 3132 break; 3133 } 3134 } 3135 return *pos ? NULL : n; 3136 } 3137 3138 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) 3139 { 3140 struct neigh_seq_state *state = seq->private; 3141 struct net *net = seq_file_net(seq); 3142 struct neigh_table *tbl = state->tbl; 3143 struct pneigh_entry *pn = NULL; 3144 int bucket; 3145 3146 state->flags |= NEIGH_SEQ_IS_PNEIGH; 3147 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { 3148 pn = tbl->phash_buckets[bucket]; 3149 while (pn && !net_eq(pneigh_net(pn), net)) 3150 pn = pn->next; 3151 if (pn) 3152 break; 3153 } 3154 state->bucket = bucket; 3155 3156 return pn; 3157 } 3158 3159 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, 3160 struct pneigh_entry *pn, 3161 loff_t *pos) 3162 { 3163 struct neigh_seq_state *state = seq->private; 3164 struct net *net = seq_file_net(seq); 3165 struct neigh_table *tbl = state->tbl; 3166 3167 do { 3168 pn = pn->next; 3169 } while (pn && !net_eq(pneigh_net(pn), net)); 3170 3171 while (!pn) { 3172 if (++state->bucket > PNEIGH_HASHMASK) 3173 break; 3174 pn = tbl->phash_buckets[state->bucket]; 3175 while (pn && !net_eq(pneigh_net(pn), net)) 3176 pn = pn->next; 3177 if (pn) 3178 break; 3179 } 3180 3181 if (pn && pos) 3182 --(*pos); 3183 3184 return pn; 3185 } 3186 3187 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos) 3188 { 3189 struct pneigh_entry *pn = pneigh_get_first(seq); 3190 3191 if (pn) { 3192 --(*pos); 3193 while (*pos) { 3194 pn = pneigh_get_next(seq, pn, pos); 3195 if (!pn) 3196 break; 3197 } 3198 } 3199 return *pos ? NULL : pn; 3200 } 3201 3202 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) 3203 { 3204 struct neigh_seq_state *state = seq->private; 3205 void *rc; 3206 loff_t idxpos = *pos; 3207 3208 rc = neigh_get_idx(seq, &idxpos); 3209 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY)) 3210 rc = pneigh_get_idx(seq, &idxpos); 3211 3212 return rc; 3213 } 3214 3215 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) 3216 __acquires(tbl->lock) 3217 __acquires(rcu_bh) 3218 { 3219 struct neigh_seq_state *state = seq->private; 3220 3221 state->tbl = tbl; 3222 state->bucket = 0; 3223 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); 3224 3225 rcu_read_lock_bh(); 3226 state->nht = rcu_dereference_bh(tbl->nht); 3227 read_lock(&tbl->lock); 3228 3229 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; 3230 } 3231 EXPORT_SYMBOL(neigh_seq_start); 3232 3233 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3234 { 3235 struct neigh_seq_state *state; 3236 void *rc; 3237 3238 if (v == SEQ_START_TOKEN) { 3239 rc = neigh_get_first(seq); 3240 goto out; 3241 } 3242 3243 state = seq->private; 3244 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) { 3245 rc = neigh_get_next(seq, v, NULL); 3246 if (rc) 3247 goto out; 3248 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY)) 3249 rc = pneigh_get_first(seq); 3250 } else { 3251 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY); 3252 rc = pneigh_get_next(seq, v, NULL); 3253 } 3254 out: 3255 ++(*pos); 3256 return rc; 3257 } 3258 EXPORT_SYMBOL(neigh_seq_next); 3259 3260 void neigh_seq_stop(struct seq_file *seq, void *v) 3261 __releases(tbl->lock) 3262 __releases(rcu_bh) 3263 { 3264 struct neigh_seq_state *state = seq->private; 3265 struct neigh_table *tbl = state->tbl; 3266 3267 read_unlock(&tbl->lock); 3268 rcu_read_unlock_bh(); 3269 } 3270 EXPORT_SYMBOL(neigh_seq_stop); 3271 3272 /* statistics via seq_file */ 3273 3274 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) 3275 { 3276 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3277 int cpu; 3278 3279 if (*pos == 0) 3280 return SEQ_START_TOKEN; 3281 3282 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { 3283 if (!cpu_possible(cpu)) 3284 continue; 3285 *pos = cpu+1; 3286 return per_cpu_ptr(tbl->stats, cpu); 3287 } 3288 return NULL; 3289 } 3290 3291 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3292 { 3293 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3294 int cpu; 3295 3296 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { 3297 if (!cpu_possible(cpu)) 3298 continue; 3299 *pos = cpu+1; 3300 return per_cpu_ptr(tbl->stats, cpu); 3301 } 3302 (*pos)++; 3303 return NULL; 3304 } 3305 3306 static void neigh_stat_seq_stop(struct seq_file *seq, void *v) 3307 { 3308 3309 } 3310 3311 static int neigh_stat_seq_show(struct seq_file *seq, void *v) 3312 { 3313 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3314 struct neigh_statistics *st = v; 3315 3316 if (v == SEQ_START_TOKEN) { 3317 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n"); 3318 return 0; 3319 } 3320 3321 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " 3322 "%08lx %08lx %08lx %08lx %08lx %08lx\n", 3323 atomic_read(&tbl->entries), 3324 3325 st->allocs, 3326 st->destroys, 3327 st->hash_grows, 3328 3329 st->lookups, 3330 st->hits, 3331 3332 st->res_failed, 3333 3334 st->rcv_probes_mcast, 3335 st->rcv_probes_ucast, 3336 3337 st->periodic_gc_runs, 3338 st->forced_gc_runs, 3339 st->unres_discards, 3340 st->table_fulls 3341 ); 3342 3343 return 0; 3344 } 3345 3346 static const struct seq_operations neigh_stat_seq_ops = { 3347 .start = neigh_stat_seq_start, 3348 .next = neigh_stat_seq_next, 3349 .stop = neigh_stat_seq_stop, 3350 .show = neigh_stat_seq_show, 3351 }; 3352 #endif /* CONFIG_PROC_FS */ 3353 3354 static void __neigh_notify(struct neighbour *n, int type, int flags, 3355 u32 pid) 3356 { 3357 struct net *net = dev_net(n->dev); 3358 struct sk_buff *skb; 3359 int err = -ENOBUFS; 3360 3361 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC); 3362 if (skb == NULL) 3363 goto errout; 3364 3365 err = neigh_fill_info(skb, n, pid, 0, type, flags); 3366 if (err < 0) { 3367 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */ 3368 WARN_ON(err == -EMSGSIZE); 3369 kfree_skb(skb); 3370 goto errout; 3371 } 3372 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); 3373 return; 3374 errout: 3375 if (err < 0) 3376 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); 3377 } 3378 3379 void neigh_app_ns(struct neighbour *n) 3380 { 3381 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0); 3382 } 3383 EXPORT_SYMBOL(neigh_app_ns); 3384 3385 #ifdef CONFIG_SYSCTL 3386 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); 3387 3388 static int proc_unres_qlen(struct ctl_table *ctl, int write, 3389 void *buffer, size_t *lenp, loff_t *ppos) 3390 { 3391 int size, ret; 3392 struct ctl_table tmp = *ctl; 3393 3394 tmp.extra1 = SYSCTL_ZERO; 3395 tmp.extra2 = &unres_qlen_max; 3396 tmp.data = &size; 3397 3398 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN); 3399 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 3400 3401 if (write && !ret) 3402 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); 3403 return ret; 3404 } 3405 3406 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, 3407 int family) 3408 { 3409 switch (family) { 3410 case AF_INET: 3411 return __in_dev_arp_parms_get_rcu(dev); 3412 case AF_INET6: 3413 return __in6_dev_nd_parms_get_rcu(dev); 3414 } 3415 return NULL; 3416 } 3417 3418 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, 3419 int index) 3420 { 3421 struct net_device *dev; 3422 int family = neigh_parms_family(p); 3423 3424 rcu_read_lock(); 3425 for_each_netdev_rcu(net, dev) { 3426 struct neigh_parms *dst_p = 3427 neigh_get_dev_parms_rcu(dev, family); 3428 3429 if (dst_p && !test_bit(index, dst_p->data_state)) 3430 dst_p->data[index] = p->data[index]; 3431 } 3432 rcu_read_unlock(); 3433 } 3434 3435 static void neigh_proc_update(struct ctl_table *ctl, int write) 3436 { 3437 struct net_device *dev = ctl->extra1; 3438 struct neigh_parms *p = ctl->extra2; 3439 struct net *net = neigh_parms_net(p); 3440 int index = (int *) ctl->data - p->data; 3441 3442 if (!write) 3443 return; 3444 3445 set_bit(index, p->data_state); 3446 if (index == NEIGH_VAR_DELAY_PROBE_TIME) 3447 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); 3448 if (!dev) /* NULL dev means this is default value */ 3449 neigh_copy_dflt_parms(net, p, index); 3450 } 3451 3452 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, 3453 void *buffer, size_t *lenp, 3454 loff_t *ppos) 3455 { 3456 struct ctl_table tmp = *ctl; 3457 int ret; 3458 3459 tmp.extra1 = SYSCTL_ZERO; 3460 tmp.extra2 = SYSCTL_INT_MAX; 3461 3462 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 3463 neigh_proc_update(ctl, write); 3464 return ret; 3465 } 3466 3467 int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer, 3468 size_t *lenp, loff_t *ppos) 3469 { 3470 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 3471 3472 neigh_proc_update(ctl, write); 3473 return ret; 3474 } 3475 EXPORT_SYMBOL(neigh_proc_dointvec); 3476 3477 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer, 3478 size_t *lenp, loff_t *ppos) 3479 { 3480 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); 3481 3482 neigh_proc_update(ctl, write); 3483 return ret; 3484 } 3485 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies); 3486 3487 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write, 3488 void *buffer, size_t *lenp, 3489 loff_t *ppos) 3490 { 3491 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos); 3492 3493 neigh_proc_update(ctl, write); 3494 return ret; 3495 } 3496 3497 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, 3498 void *buffer, size_t *lenp, loff_t *ppos) 3499 { 3500 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); 3501 3502 neigh_proc_update(ctl, write); 3503 return ret; 3504 } 3505 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies); 3506 3507 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, 3508 void *buffer, size_t *lenp, 3509 loff_t *ppos) 3510 { 3511 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos); 3512 3513 neigh_proc_update(ctl, write); 3514 return ret; 3515 } 3516 3517 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write, 3518 void *buffer, size_t *lenp, 3519 loff_t *ppos) 3520 { 3521 struct neigh_parms *p = ctl->extra2; 3522 int ret; 3523 3524 if (strcmp(ctl->procname, "base_reachable_time") == 0) 3525 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); 3526 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0) 3527 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); 3528 else 3529 ret = -1; 3530 3531 if (write && ret == 0) { 3532 /* update reachable_time as well, otherwise, the change will 3533 * only be effective after the next time neigh_periodic_work 3534 * decides to recompute it 3535 */ 3536 p->reachable_time = 3537 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 3538 } 3539 return ret; 3540 } 3541 3542 #define NEIGH_PARMS_DATA_OFFSET(index) \ 3543 (&((struct neigh_parms *) 0)->data[index]) 3544 3545 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \ 3546 [NEIGH_VAR_ ## attr] = { \ 3547 .procname = name, \ 3548 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \ 3549 .maxlen = sizeof(int), \ 3550 .mode = mval, \ 3551 .proc_handler = proc, \ 3552 } 3553 3554 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \ 3555 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax) 3556 3557 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \ 3558 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies) 3559 3560 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \ 3561 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies) 3562 3563 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \ 3564 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies) 3565 3566 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \ 3567 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen) 3568 3569 static struct neigh_sysctl_table { 3570 struct ctl_table_header *sysctl_header; 3571 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; 3572 } neigh_sysctl_template __read_mostly = { 3573 .neigh_vars = { 3574 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), 3575 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"), 3576 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"), 3577 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"), 3578 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), 3579 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), 3580 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), 3581 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"), 3582 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"), 3583 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"), 3584 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"), 3585 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"), 3586 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"), 3587 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"), 3588 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"), 3589 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"), 3590 [NEIGH_VAR_GC_INTERVAL] = { 3591 .procname = "gc_interval", 3592 .maxlen = sizeof(int), 3593 .mode = 0644, 3594 .proc_handler = proc_dointvec_jiffies, 3595 }, 3596 [NEIGH_VAR_GC_THRESH1] = { 3597 .procname = "gc_thresh1", 3598 .maxlen = sizeof(int), 3599 .mode = 0644, 3600 .extra1 = SYSCTL_ZERO, 3601 .extra2 = SYSCTL_INT_MAX, 3602 .proc_handler = proc_dointvec_minmax, 3603 }, 3604 [NEIGH_VAR_GC_THRESH2] = { 3605 .procname = "gc_thresh2", 3606 .maxlen = sizeof(int), 3607 .mode = 0644, 3608 .extra1 = SYSCTL_ZERO, 3609 .extra2 = SYSCTL_INT_MAX, 3610 .proc_handler = proc_dointvec_minmax, 3611 }, 3612 [NEIGH_VAR_GC_THRESH3] = { 3613 .procname = "gc_thresh3", 3614 .maxlen = sizeof(int), 3615 .mode = 0644, 3616 .extra1 = SYSCTL_ZERO, 3617 .extra2 = SYSCTL_INT_MAX, 3618 .proc_handler = proc_dointvec_minmax, 3619 }, 3620 {}, 3621 }, 3622 }; 3623 3624 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, 3625 proc_handler *handler) 3626 { 3627 int i; 3628 struct neigh_sysctl_table *t; 3629 const char *dev_name_source; 3630 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; 3631 char *p_name; 3632 3633 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); 3634 if (!t) 3635 goto err; 3636 3637 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) { 3638 t->neigh_vars[i].data += (long) p; 3639 t->neigh_vars[i].extra1 = dev; 3640 t->neigh_vars[i].extra2 = p; 3641 } 3642 3643 if (dev) { 3644 dev_name_source = dev->name; 3645 /* Terminate the table early */ 3646 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, 3647 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); 3648 } else { 3649 struct neigh_table *tbl = p->tbl; 3650 dev_name_source = "default"; 3651 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval; 3652 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1; 3653 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2; 3654 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3; 3655 } 3656 3657 if (handler) { 3658 /* RetransTime */ 3659 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; 3660 /* ReachableTime */ 3661 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; 3662 /* RetransTime (in milliseconds)*/ 3663 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; 3664 /* ReachableTime (in milliseconds) */ 3665 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; 3666 } else { 3667 /* Those handlers will update p->reachable_time after 3668 * base_reachable_time(_ms) is set to ensure the new timer starts being 3669 * applied after the next neighbour update instead of waiting for 3670 * neigh_periodic_work to update its value (can be multiple minutes) 3671 * So any handler that replaces them should do this as well 3672 */ 3673 /* ReachableTime */ 3674 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = 3675 neigh_proc_base_reachable_time; 3676 /* ReachableTime (in milliseconds) */ 3677 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = 3678 neigh_proc_base_reachable_time; 3679 } 3680 3681 /* Don't export sysctls to unprivileged users */ 3682 if (neigh_parms_net(p)->user_ns != &init_user_ns) 3683 t->neigh_vars[0].procname = NULL; 3684 3685 switch (neigh_parms_family(p)) { 3686 case AF_INET: 3687 p_name = "ipv4"; 3688 break; 3689 case AF_INET6: 3690 p_name = "ipv6"; 3691 break; 3692 default: 3693 BUG(); 3694 } 3695 3696 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", 3697 p_name, dev_name_source); 3698 t->sysctl_header = 3699 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars); 3700 if (!t->sysctl_header) 3701 goto free; 3702 3703 p->sysctl_table = t; 3704 return 0; 3705 3706 free: 3707 kfree(t); 3708 err: 3709 return -ENOBUFS; 3710 } 3711 EXPORT_SYMBOL(neigh_sysctl_register); 3712 3713 void neigh_sysctl_unregister(struct neigh_parms *p) 3714 { 3715 if (p->sysctl_table) { 3716 struct neigh_sysctl_table *t = p->sysctl_table; 3717 p->sysctl_table = NULL; 3718 unregister_net_sysctl_table(t->sysctl_header); 3719 kfree(t); 3720 } 3721 } 3722 EXPORT_SYMBOL(neigh_sysctl_unregister); 3723 3724 #endif /* CONFIG_SYSCTL */ 3725 3726 static int __init neigh_init(void) 3727 { 3728 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0); 3729 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0); 3730 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0); 3731 3732 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, 3733 0); 3734 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0); 3735 3736 return 0; 3737 } 3738 3739 subsys_initcall(neigh_init); 3740