1 /* 2 * Generic address resolution entity 3 * 4 * Authors: 5 * Pedro Roque <roque@di.fc.ul.pt> 6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 * 13 * Fixes: 14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add. 15 * Harald Welte Add neighbour cache statistics like rtstat 16 */ 17 18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 19 20 #include <linux/slab.h> 21 #include <linux/types.h> 22 #include <linux/kernel.h> 23 #include <linux/module.h> 24 #include <linux/socket.h> 25 #include <linux/netdevice.h> 26 #include <linux/proc_fs.h> 27 #ifdef CONFIG_SYSCTL 28 #include <linux/sysctl.h> 29 #endif 30 #include <linux/times.h> 31 #include <net/net_namespace.h> 32 #include <net/neighbour.h> 33 #include <net/dst.h> 34 #include <net/sock.h> 35 #include <net/netevent.h> 36 #include <net/netlink.h> 37 #include <linux/rtnetlink.h> 38 #include <linux/random.h> 39 #include <linux/string.h> 40 #include <linux/log2.h> 41 #include <linux/inetdevice.h> 42 #include <net/addrconf.h> 43 44 #define DEBUG 45 #define NEIGH_DEBUG 1 46 #define neigh_dbg(level, fmt, ...) \ 47 do { \ 48 if (level <= NEIGH_DEBUG) \ 49 pr_debug(fmt, ##__VA_ARGS__); \ 50 } while (0) 51 52 #define PNEIGH_HASHMASK 0xF 53 54 static void neigh_timer_handler(struct timer_list *t); 55 static void __neigh_notify(struct neighbour *n, int type, int flags, 56 u32 pid); 57 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid); 58 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, 59 struct net_device *dev); 60 61 #ifdef CONFIG_PROC_FS 62 static const struct seq_operations neigh_stat_seq_ops; 63 #endif 64 65 /* 66 Neighbour hash table buckets are protected with rwlock tbl->lock. 67 68 - All the scans/updates to hash buckets MUST be made under this lock. 69 - NOTHING clever should be made under this lock: no callbacks 70 to protocol backends, no attempts to send something to network. 71 It will result in deadlocks, if backend/driver wants to use neighbour 72 cache. 73 - If the entry requires some non-trivial actions, increase 74 its reference count and release table lock. 75 76 Neighbour entries are protected: 77 - with reference count. 78 - with rwlock neigh->lock 79 80 Reference count prevents destruction. 81 82 neigh->lock mainly serializes ll address data and its validity state. 83 However, the same lock is used to protect another entry fields: 84 - timer 85 - resolution queue 86 87 Again, nothing clever shall be made under neigh->lock, 88 the most complicated procedure, which we allow is dev->hard_header. 89 It is supposed, that dev->hard_header is simplistic and does 90 not make callbacks to neighbour tables. 91 */ 92 93 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb) 94 { 95 kfree_skb(skb); 96 return -ENETDOWN; 97 } 98 99 static void neigh_cleanup_and_release(struct neighbour *neigh) 100 { 101 if (neigh->parms->neigh_cleanup) 102 neigh->parms->neigh_cleanup(neigh); 103 104 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0); 105 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); 106 neigh_release(neigh); 107 } 108 109 /* 110 * It is random distribution in the interval (1/2)*base...(3/2)*base. 111 * It corresponds to default IPv6 settings and is not overridable, 112 * because it is really reasonable choice. 113 */ 114 115 unsigned long neigh_rand_reach_time(unsigned long base) 116 { 117 return base ? (prandom_u32() % base) + (base >> 1) : 0; 118 } 119 EXPORT_SYMBOL(neigh_rand_reach_time); 120 121 static void neigh_mark_dead(struct neighbour *n) 122 { 123 n->dead = 1; 124 if (!list_empty(&n->gc_list)) { 125 list_del_init(&n->gc_list); 126 atomic_dec(&n->tbl->gc_entries); 127 } 128 } 129 130 static void neigh_update_gc_list(struct neighbour *n) 131 { 132 bool on_gc_list, exempt_from_gc; 133 134 write_lock_bh(&n->tbl->lock); 135 write_lock(&n->lock); 136 137 /* remove from the gc list if new state is permanent or if neighbor 138 * is externally learned; otherwise entry should be on the gc list 139 */ 140 exempt_from_gc = n->nud_state & NUD_PERMANENT || 141 n->flags & NTF_EXT_LEARNED; 142 on_gc_list = !list_empty(&n->gc_list); 143 144 if (exempt_from_gc && on_gc_list) { 145 list_del_init(&n->gc_list); 146 atomic_dec(&n->tbl->gc_entries); 147 } else if (!exempt_from_gc && !on_gc_list) { 148 /* add entries to the tail; cleaning removes from the front */ 149 list_add_tail(&n->gc_list, &n->tbl->gc_list); 150 atomic_inc(&n->tbl->gc_entries); 151 } 152 153 write_unlock(&n->lock); 154 write_unlock_bh(&n->tbl->lock); 155 } 156 157 static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags, 158 int *notify) 159 { 160 bool rc = false; 161 u8 ndm_flags; 162 163 if (!(flags & NEIGH_UPDATE_F_ADMIN)) 164 return rc; 165 166 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; 167 if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { 168 if (ndm_flags & NTF_EXT_LEARNED) 169 neigh->flags |= NTF_EXT_LEARNED; 170 else 171 neigh->flags &= ~NTF_EXT_LEARNED; 172 rc = true; 173 *notify = 1; 174 } 175 176 return rc; 177 } 178 179 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, 180 struct neigh_table *tbl) 181 { 182 bool retval = false; 183 184 write_lock(&n->lock); 185 if (refcount_read(&n->refcnt) == 1) { 186 struct neighbour *neigh; 187 188 neigh = rcu_dereference_protected(n->next, 189 lockdep_is_held(&tbl->lock)); 190 rcu_assign_pointer(*np, neigh); 191 neigh_mark_dead(n); 192 retval = true; 193 } 194 write_unlock(&n->lock); 195 if (retval) 196 neigh_cleanup_and_release(n); 197 return retval; 198 } 199 200 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) 201 { 202 struct neigh_hash_table *nht; 203 void *pkey = ndel->primary_key; 204 u32 hash_val; 205 struct neighbour *n; 206 struct neighbour __rcu **np; 207 208 nht = rcu_dereference_protected(tbl->nht, 209 lockdep_is_held(&tbl->lock)); 210 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd); 211 hash_val = hash_val >> (32 - nht->hash_shift); 212 213 np = &nht->hash_buckets[hash_val]; 214 while ((n = rcu_dereference_protected(*np, 215 lockdep_is_held(&tbl->lock)))) { 216 if (n == ndel) 217 return neigh_del(n, np, tbl); 218 np = &n->next; 219 } 220 return false; 221 } 222 223 static int neigh_forced_gc(struct neigh_table *tbl) 224 { 225 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2; 226 unsigned long tref = jiffies - 5 * HZ; 227 struct neighbour *n, *tmp; 228 int shrunk = 0; 229 230 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); 231 232 write_lock_bh(&tbl->lock); 233 234 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) { 235 if (refcount_read(&n->refcnt) == 1) { 236 bool remove = false; 237 238 write_lock(&n->lock); 239 if ((n->nud_state == NUD_FAILED) || 240 time_after(tref, n->updated)) 241 remove = true; 242 write_unlock(&n->lock); 243 244 if (remove && neigh_remove_one(n, tbl)) 245 shrunk++; 246 if (shrunk >= max_clean) 247 break; 248 } 249 } 250 251 tbl->last_flush = jiffies; 252 253 write_unlock_bh(&tbl->lock); 254 255 return shrunk; 256 } 257 258 static void neigh_add_timer(struct neighbour *n, unsigned long when) 259 { 260 neigh_hold(n); 261 if (unlikely(mod_timer(&n->timer, when))) { 262 printk("NEIGH: BUG, double timer add, state is %x\n", 263 n->nud_state); 264 dump_stack(); 265 } 266 } 267 268 static int neigh_del_timer(struct neighbour *n) 269 { 270 if ((n->nud_state & NUD_IN_TIMER) && 271 del_timer(&n->timer)) { 272 neigh_release(n); 273 return 1; 274 } 275 return 0; 276 } 277 278 static void pneigh_queue_purge(struct sk_buff_head *list) 279 { 280 struct sk_buff *skb; 281 282 while ((skb = skb_dequeue(list)) != NULL) { 283 dev_put(skb->dev); 284 kfree_skb(skb); 285 } 286 } 287 288 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, 289 bool skip_perm) 290 { 291 int i; 292 struct neigh_hash_table *nht; 293 294 nht = rcu_dereference_protected(tbl->nht, 295 lockdep_is_held(&tbl->lock)); 296 297 for (i = 0; i < (1 << nht->hash_shift); i++) { 298 struct neighbour *n; 299 struct neighbour __rcu **np = &nht->hash_buckets[i]; 300 301 while ((n = rcu_dereference_protected(*np, 302 lockdep_is_held(&tbl->lock))) != NULL) { 303 if (dev && n->dev != dev) { 304 np = &n->next; 305 continue; 306 } 307 if (skip_perm && n->nud_state & NUD_PERMANENT) { 308 np = &n->next; 309 continue; 310 } 311 rcu_assign_pointer(*np, 312 rcu_dereference_protected(n->next, 313 lockdep_is_held(&tbl->lock))); 314 write_lock(&n->lock); 315 neigh_del_timer(n); 316 neigh_mark_dead(n); 317 if (refcount_read(&n->refcnt) != 1) { 318 /* The most unpleasant situation. 319 We must destroy neighbour entry, 320 but someone still uses it. 321 322 The destroy will be delayed until 323 the last user releases us, but 324 we must kill timers etc. and move 325 it to safe state. 326 */ 327 __skb_queue_purge(&n->arp_queue); 328 n->arp_queue_len_bytes = 0; 329 n->output = neigh_blackhole; 330 if (n->nud_state & NUD_VALID) 331 n->nud_state = NUD_NOARP; 332 else 333 n->nud_state = NUD_NONE; 334 neigh_dbg(2, "neigh %p is stray\n", n); 335 } 336 write_unlock(&n->lock); 337 neigh_cleanup_and_release(n); 338 } 339 } 340 } 341 342 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) 343 { 344 write_lock_bh(&tbl->lock); 345 neigh_flush_dev(tbl, dev, false); 346 write_unlock_bh(&tbl->lock); 347 } 348 EXPORT_SYMBOL(neigh_changeaddr); 349 350 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, 351 bool skip_perm) 352 { 353 write_lock_bh(&tbl->lock); 354 neigh_flush_dev(tbl, dev, skip_perm); 355 pneigh_ifdown_and_unlock(tbl, dev); 356 357 del_timer_sync(&tbl->proxy_timer); 358 pneigh_queue_purge(&tbl->proxy_queue); 359 return 0; 360 } 361 362 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev) 363 { 364 __neigh_ifdown(tbl, dev, true); 365 return 0; 366 } 367 EXPORT_SYMBOL(neigh_carrier_down); 368 369 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) 370 { 371 __neigh_ifdown(tbl, dev, false); 372 return 0; 373 } 374 EXPORT_SYMBOL(neigh_ifdown); 375 376 static struct neighbour *neigh_alloc(struct neigh_table *tbl, 377 struct net_device *dev, 378 bool exempt_from_gc) 379 { 380 struct neighbour *n = NULL; 381 unsigned long now = jiffies; 382 int entries; 383 384 if (exempt_from_gc) 385 goto do_alloc; 386 387 entries = atomic_inc_return(&tbl->gc_entries) - 1; 388 if (entries >= tbl->gc_thresh3 || 389 (entries >= tbl->gc_thresh2 && 390 time_after(now, tbl->last_flush + 5 * HZ))) { 391 if (!neigh_forced_gc(tbl) && 392 entries >= tbl->gc_thresh3) { 393 net_info_ratelimited("%s: neighbor table overflow!\n", 394 tbl->id); 395 NEIGH_CACHE_STAT_INC(tbl, table_fulls); 396 goto out_entries; 397 } 398 } 399 400 do_alloc: 401 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC); 402 if (!n) 403 goto out_entries; 404 405 __skb_queue_head_init(&n->arp_queue); 406 rwlock_init(&n->lock); 407 seqlock_init(&n->ha_lock); 408 n->updated = n->used = now; 409 n->nud_state = NUD_NONE; 410 n->output = neigh_blackhole; 411 seqlock_init(&n->hh.hh_lock); 412 n->parms = neigh_parms_clone(&tbl->parms); 413 timer_setup(&n->timer, neigh_timer_handler, 0); 414 415 NEIGH_CACHE_STAT_INC(tbl, allocs); 416 n->tbl = tbl; 417 refcount_set(&n->refcnt, 1); 418 n->dead = 1; 419 INIT_LIST_HEAD(&n->gc_list); 420 421 atomic_inc(&tbl->entries); 422 out: 423 return n; 424 425 out_entries: 426 if (!exempt_from_gc) 427 atomic_dec(&tbl->gc_entries); 428 goto out; 429 } 430 431 static void neigh_get_hash_rnd(u32 *x) 432 { 433 *x = get_random_u32() | 1; 434 } 435 436 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) 437 { 438 size_t size = (1 << shift) * sizeof(struct neighbour *); 439 struct neigh_hash_table *ret; 440 struct neighbour __rcu **buckets; 441 int i; 442 443 ret = kmalloc(sizeof(*ret), GFP_ATOMIC); 444 if (!ret) 445 return NULL; 446 if (size <= PAGE_SIZE) 447 buckets = kzalloc(size, GFP_ATOMIC); 448 else 449 buckets = (struct neighbour __rcu **) 450 __get_free_pages(GFP_ATOMIC | __GFP_ZERO, 451 get_order(size)); 452 if (!buckets) { 453 kfree(ret); 454 return NULL; 455 } 456 ret->hash_buckets = buckets; 457 ret->hash_shift = shift; 458 for (i = 0; i < NEIGH_NUM_HASH_RND; i++) 459 neigh_get_hash_rnd(&ret->hash_rnd[i]); 460 return ret; 461 } 462 463 static void neigh_hash_free_rcu(struct rcu_head *head) 464 { 465 struct neigh_hash_table *nht = container_of(head, 466 struct neigh_hash_table, 467 rcu); 468 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); 469 struct neighbour __rcu **buckets = nht->hash_buckets; 470 471 if (size <= PAGE_SIZE) 472 kfree(buckets); 473 else 474 free_pages((unsigned long)buckets, get_order(size)); 475 kfree(nht); 476 } 477 478 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, 479 unsigned long new_shift) 480 { 481 unsigned int i, hash; 482 struct neigh_hash_table *new_nht, *old_nht; 483 484 NEIGH_CACHE_STAT_INC(tbl, hash_grows); 485 486 old_nht = rcu_dereference_protected(tbl->nht, 487 lockdep_is_held(&tbl->lock)); 488 new_nht = neigh_hash_alloc(new_shift); 489 if (!new_nht) 490 return old_nht; 491 492 for (i = 0; i < (1 << old_nht->hash_shift); i++) { 493 struct neighbour *n, *next; 494 495 for (n = rcu_dereference_protected(old_nht->hash_buckets[i], 496 lockdep_is_held(&tbl->lock)); 497 n != NULL; 498 n = next) { 499 hash = tbl->hash(n->primary_key, n->dev, 500 new_nht->hash_rnd); 501 502 hash >>= (32 - new_nht->hash_shift); 503 next = rcu_dereference_protected(n->next, 504 lockdep_is_held(&tbl->lock)); 505 506 rcu_assign_pointer(n->next, 507 rcu_dereference_protected( 508 new_nht->hash_buckets[hash], 509 lockdep_is_held(&tbl->lock))); 510 rcu_assign_pointer(new_nht->hash_buckets[hash], n); 511 } 512 } 513 514 rcu_assign_pointer(tbl->nht, new_nht); 515 call_rcu(&old_nht->rcu, neigh_hash_free_rcu); 516 return new_nht; 517 } 518 519 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, 520 struct net_device *dev) 521 { 522 struct neighbour *n; 523 524 NEIGH_CACHE_STAT_INC(tbl, lookups); 525 526 rcu_read_lock_bh(); 527 n = __neigh_lookup_noref(tbl, pkey, dev); 528 if (n) { 529 if (!refcount_inc_not_zero(&n->refcnt)) 530 n = NULL; 531 NEIGH_CACHE_STAT_INC(tbl, hits); 532 } 533 534 rcu_read_unlock_bh(); 535 return n; 536 } 537 EXPORT_SYMBOL(neigh_lookup); 538 539 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, 540 const void *pkey) 541 { 542 struct neighbour *n; 543 unsigned int key_len = tbl->key_len; 544 u32 hash_val; 545 struct neigh_hash_table *nht; 546 547 NEIGH_CACHE_STAT_INC(tbl, lookups); 548 549 rcu_read_lock_bh(); 550 nht = rcu_dereference_bh(tbl->nht); 551 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift); 552 553 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); 554 n != NULL; 555 n = rcu_dereference_bh(n->next)) { 556 if (!memcmp(n->primary_key, pkey, key_len) && 557 net_eq(dev_net(n->dev), net)) { 558 if (!refcount_inc_not_zero(&n->refcnt)) 559 n = NULL; 560 NEIGH_CACHE_STAT_INC(tbl, hits); 561 break; 562 } 563 } 564 565 rcu_read_unlock_bh(); 566 return n; 567 } 568 EXPORT_SYMBOL(neigh_lookup_nodev); 569 570 static struct neighbour *___neigh_create(struct neigh_table *tbl, 571 const void *pkey, 572 struct net_device *dev, 573 bool exempt_from_gc, bool want_ref) 574 { 575 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc); 576 u32 hash_val; 577 unsigned int key_len = tbl->key_len; 578 int error; 579 struct neigh_hash_table *nht; 580 581 if (!n) { 582 rc = ERR_PTR(-ENOBUFS); 583 goto out; 584 } 585 586 memcpy(n->primary_key, pkey, key_len); 587 n->dev = dev; 588 dev_hold(dev); 589 590 /* Protocol specific setup. */ 591 if (tbl->constructor && (error = tbl->constructor(n)) < 0) { 592 rc = ERR_PTR(error); 593 goto out_neigh_release; 594 } 595 596 if (dev->netdev_ops->ndo_neigh_construct) { 597 error = dev->netdev_ops->ndo_neigh_construct(dev, n); 598 if (error < 0) { 599 rc = ERR_PTR(error); 600 goto out_neigh_release; 601 } 602 } 603 604 /* Device specific setup. */ 605 if (n->parms->neigh_setup && 606 (error = n->parms->neigh_setup(n)) < 0) { 607 rc = ERR_PTR(error); 608 goto out_neigh_release; 609 } 610 611 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1); 612 613 write_lock_bh(&tbl->lock); 614 nht = rcu_dereference_protected(tbl->nht, 615 lockdep_is_held(&tbl->lock)); 616 617 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) 618 nht = neigh_hash_grow(tbl, nht->hash_shift + 1); 619 620 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift); 621 622 if (n->parms->dead) { 623 rc = ERR_PTR(-EINVAL); 624 goto out_tbl_unlock; 625 } 626 627 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], 628 lockdep_is_held(&tbl->lock)); 629 n1 != NULL; 630 n1 = rcu_dereference_protected(n1->next, 631 lockdep_is_held(&tbl->lock))) { 632 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { 633 if (want_ref) 634 neigh_hold(n1); 635 rc = n1; 636 goto out_tbl_unlock; 637 } 638 } 639 640 n->dead = 0; 641 if (!exempt_from_gc) 642 list_add_tail(&n->gc_list, &n->tbl->gc_list); 643 644 if (want_ref) 645 neigh_hold(n); 646 rcu_assign_pointer(n->next, 647 rcu_dereference_protected(nht->hash_buckets[hash_val], 648 lockdep_is_held(&tbl->lock))); 649 rcu_assign_pointer(nht->hash_buckets[hash_val], n); 650 write_unlock_bh(&tbl->lock); 651 neigh_dbg(2, "neigh %p is created\n", n); 652 rc = n; 653 out: 654 return rc; 655 out_tbl_unlock: 656 write_unlock_bh(&tbl->lock); 657 out_neigh_release: 658 neigh_release(n); 659 goto out; 660 } 661 662 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, 663 struct net_device *dev, bool want_ref) 664 { 665 return ___neigh_create(tbl, pkey, dev, false, want_ref); 666 } 667 EXPORT_SYMBOL(__neigh_create); 668 669 static u32 pneigh_hash(const void *pkey, unsigned int key_len) 670 { 671 u32 hash_val = *(u32 *)(pkey + key_len - 4); 672 hash_val ^= (hash_val >> 16); 673 hash_val ^= hash_val >> 8; 674 hash_val ^= hash_val >> 4; 675 hash_val &= PNEIGH_HASHMASK; 676 return hash_val; 677 } 678 679 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, 680 struct net *net, 681 const void *pkey, 682 unsigned int key_len, 683 struct net_device *dev) 684 { 685 while (n) { 686 if (!memcmp(n->key, pkey, key_len) && 687 net_eq(pneigh_net(n), net) && 688 (n->dev == dev || !n->dev)) 689 return n; 690 n = n->next; 691 } 692 return NULL; 693 } 694 695 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, 696 struct net *net, const void *pkey, struct net_device *dev) 697 { 698 unsigned int key_len = tbl->key_len; 699 u32 hash_val = pneigh_hash(pkey, key_len); 700 701 return __pneigh_lookup_1(tbl->phash_buckets[hash_val], 702 net, pkey, key_len, dev); 703 } 704 EXPORT_SYMBOL_GPL(__pneigh_lookup); 705 706 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, 707 struct net *net, const void *pkey, 708 struct net_device *dev, int creat) 709 { 710 struct pneigh_entry *n; 711 unsigned int key_len = tbl->key_len; 712 u32 hash_val = pneigh_hash(pkey, key_len); 713 714 read_lock_bh(&tbl->lock); 715 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val], 716 net, pkey, key_len, dev); 717 read_unlock_bh(&tbl->lock); 718 719 if (n || !creat) 720 goto out; 721 722 ASSERT_RTNL(); 723 724 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); 725 if (!n) 726 goto out; 727 728 write_pnet(&n->net, net); 729 memcpy(n->key, pkey, key_len); 730 n->dev = dev; 731 if (dev) 732 dev_hold(dev); 733 734 if (tbl->pconstructor && tbl->pconstructor(n)) { 735 if (dev) 736 dev_put(dev); 737 kfree(n); 738 n = NULL; 739 goto out; 740 } 741 742 write_lock_bh(&tbl->lock); 743 n->next = tbl->phash_buckets[hash_val]; 744 tbl->phash_buckets[hash_val] = n; 745 write_unlock_bh(&tbl->lock); 746 out: 747 return n; 748 } 749 EXPORT_SYMBOL(pneigh_lookup); 750 751 752 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, 753 struct net_device *dev) 754 { 755 struct pneigh_entry *n, **np; 756 unsigned int key_len = tbl->key_len; 757 u32 hash_val = pneigh_hash(pkey, key_len); 758 759 write_lock_bh(&tbl->lock); 760 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; 761 np = &n->next) { 762 if (!memcmp(n->key, pkey, key_len) && n->dev == dev && 763 net_eq(pneigh_net(n), net)) { 764 *np = n->next; 765 write_unlock_bh(&tbl->lock); 766 if (tbl->pdestructor) 767 tbl->pdestructor(n); 768 if (n->dev) 769 dev_put(n->dev); 770 kfree(n); 771 return 0; 772 } 773 } 774 write_unlock_bh(&tbl->lock); 775 return -ENOENT; 776 } 777 778 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, 779 struct net_device *dev) 780 { 781 struct pneigh_entry *n, **np, *freelist = NULL; 782 u32 h; 783 784 for (h = 0; h <= PNEIGH_HASHMASK; h++) { 785 np = &tbl->phash_buckets[h]; 786 while ((n = *np) != NULL) { 787 if (!dev || n->dev == dev) { 788 *np = n->next; 789 n->next = freelist; 790 freelist = n; 791 continue; 792 } 793 np = &n->next; 794 } 795 } 796 write_unlock_bh(&tbl->lock); 797 while ((n = freelist)) { 798 freelist = n->next; 799 n->next = NULL; 800 if (tbl->pdestructor) 801 tbl->pdestructor(n); 802 if (n->dev) 803 dev_put(n->dev); 804 kfree(n); 805 } 806 return -ENOENT; 807 } 808 809 static void neigh_parms_destroy(struct neigh_parms *parms); 810 811 static inline void neigh_parms_put(struct neigh_parms *parms) 812 { 813 if (refcount_dec_and_test(&parms->refcnt)) 814 neigh_parms_destroy(parms); 815 } 816 817 /* 818 * neighbour must already be out of the table; 819 * 820 */ 821 void neigh_destroy(struct neighbour *neigh) 822 { 823 struct net_device *dev = neigh->dev; 824 825 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); 826 827 if (!neigh->dead) { 828 pr_warn("Destroying alive neighbour %p\n", neigh); 829 dump_stack(); 830 return; 831 } 832 833 if (neigh_del_timer(neigh)) 834 pr_warn("Impossible event\n"); 835 836 write_lock_bh(&neigh->lock); 837 __skb_queue_purge(&neigh->arp_queue); 838 write_unlock_bh(&neigh->lock); 839 neigh->arp_queue_len_bytes = 0; 840 841 if (dev->netdev_ops->ndo_neigh_destroy) 842 dev->netdev_ops->ndo_neigh_destroy(dev, neigh); 843 844 dev_put(dev); 845 neigh_parms_put(neigh->parms); 846 847 neigh_dbg(2, "neigh %p is destroyed\n", neigh); 848 849 atomic_dec(&neigh->tbl->entries); 850 kfree_rcu(neigh, rcu); 851 } 852 EXPORT_SYMBOL(neigh_destroy); 853 854 /* Neighbour state is suspicious; 855 disable fast path. 856 857 Called with write_locked neigh. 858 */ 859 static void neigh_suspect(struct neighbour *neigh) 860 { 861 neigh_dbg(2, "neigh %p is suspected\n", neigh); 862 863 neigh->output = neigh->ops->output; 864 } 865 866 /* Neighbour state is OK; 867 enable fast path. 868 869 Called with write_locked neigh. 870 */ 871 static void neigh_connect(struct neighbour *neigh) 872 { 873 neigh_dbg(2, "neigh %p is connected\n", neigh); 874 875 neigh->output = neigh->ops->connected_output; 876 } 877 878 static void neigh_periodic_work(struct work_struct *work) 879 { 880 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); 881 struct neighbour *n; 882 struct neighbour __rcu **np; 883 unsigned int i; 884 struct neigh_hash_table *nht; 885 886 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); 887 888 write_lock_bh(&tbl->lock); 889 nht = rcu_dereference_protected(tbl->nht, 890 lockdep_is_held(&tbl->lock)); 891 892 /* 893 * periodically recompute ReachableTime from random function 894 */ 895 896 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) { 897 struct neigh_parms *p; 898 tbl->last_rand = jiffies; 899 list_for_each_entry(p, &tbl->parms_list, list) 900 p->reachable_time = 901 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 902 } 903 904 if (atomic_read(&tbl->entries) < tbl->gc_thresh1) 905 goto out; 906 907 for (i = 0 ; i < (1 << nht->hash_shift); i++) { 908 np = &nht->hash_buckets[i]; 909 910 while ((n = rcu_dereference_protected(*np, 911 lockdep_is_held(&tbl->lock))) != NULL) { 912 unsigned int state; 913 914 write_lock(&n->lock); 915 916 state = n->nud_state; 917 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) || 918 (n->flags & NTF_EXT_LEARNED)) { 919 write_unlock(&n->lock); 920 goto next_elt; 921 } 922 923 if (time_before(n->used, n->confirmed)) 924 n->used = n->confirmed; 925 926 if (refcount_read(&n->refcnt) == 1 && 927 (state == NUD_FAILED || 928 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { 929 *np = n->next; 930 neigh_mark_dead(n); 931 write_unlock(&n->lock); 932 neigh_cleanup_and_release(n); 933 continue; 934 } 935 write_unlock(&n->lock); 936 937 next_elt: 938 np = &n->next; 939 } 940 /* 941 * It's fine to release lock here, even if hash table 942 * grows while we are preempted. 943 */ 944 write_unlock_bh(&tbl->lock); 945 cond_resched(); 946 write_lock_bh(&tbl->lock); 947 nht = rcu_dereference_protected(tbl->nht, 948 lockdep_is_held(&tbl->lock)); 949 } 950 out: 951 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks. 952 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2 953 * BASE_REACHABLE_TIME. 954 */ 955 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 956 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1); 957 write_unlock_bh(&tbl->lock); 958 } 959 960 static __inline__ int neigh_max_probes(struct neighbour *n) 961 { 962 struct neigh_parms *p = n->parms; 963 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + 964 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) : 965 NEIGH_VAR(p, MCAST_PROBES)); 966 } 967 968 static void neigh_invalidate(struct neighbour *neigh) 969 __releases(neigh->lock) 970 __acquires(neigh->lock) 971 { 972 struct sk_buff *skb; 973 974 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); 975 neigh_dbg(2, "neigh %p is failed\n", neigh); 976 neigh->updated = jiffies; 977 978 /* It is very thin place. report_unreachable is very complicated 979 routine. Particularly, it can hit the same neighbour entry! 980 981 So that, we try to be accurate and avoid dead loop. --ANK 982 */ 983 while (neigh->nud_state == NUD_FAILED && 984 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { 985 write_unlock(&neigh->lock); 986 neigh->ops->error_report(neigh, skb); 987 write_lock(&neigh->lock); 988 } 989 __skb_queue_purge(&neigh->arp_queue); 990 neigh->arp_queue_len_bytes = 0; 991 } 992 993 static void neigh_probe(struct neighbour *neigh) 994 __releases(neigh->lock) 995 { 996 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue); 997 /* keep skb alive even if arp_queue overflows */ 998 if (skb) 999 skb = skb_clone(skb, GFP_ATOMIC); 1000 write_unlock(&neigh->lock); 1001 if (neigh->ops->solicit) 1002 neigh->ops->solicit(neigh, skb); 1003 atomic_inc(&neigh->probes); 1004 kfree_skb(skb); 1005 } 1006 1007 /* Called when a timer expires for a neighbour entry. */ 1008 1009 static void neigh_timer_handler(struct timer_list *t) 1010 { 1011 unsigned long now, next; 1012 struct neighbour *neigh = from_timer(neigh, t, timer); 1013 unsigned int state; 1014 int notify = 0; 1015 1016 write_lock(&neigh->lock); 1017 1018 state = neigh->nud_state; 1019 now = jiffies; 1020 next = now + HZ; 1021 1022 if (!(state & NUD_IN_TIMER)) 1023 goto out; 1024 1025 if (state & NUD_REACHABLE) { 1026 if (time_before_eq(now, 1027 neigh->confirmed + neigh->parms->reachable_time)) { 1028 neigh_dbg(2, "neigh %p is still alive\n", neigh); 1029 next = neigh->confirmed + neigh->parms->reachable_time; 1030 } else if (time_before_eq(now, 1031 neigh->used + 1032 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { 1033 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1034 neigh->nud_state = NUD_DELAY; 1035 neigh->updated = jiffies; 1036 neigh_suspect(neigh); 1037 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); 1038 } else { 1039 neigh_dbg(2, "neigh %p is suspected\n", neigh); 1040 neigh->nud_state = NUD_STALE; 1041 neigh->updated = jiffies; 1042 neigh_suspect(neigh); 1043 notify = 1; 1044 } 1045 } else if (state & NUD_DELAY) { 1046 if (time_before_eq(now, 1047 neigh->confirmed + 1048 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { 1049 neigh_dbg(2, "neigh %p is now reachable\n", neigh); 1050 neigh->nud_state = NUD_REACHABLE; 1051 neigh->updated = jiffies; 1052 neigh_connect(neigh); 1053 notify = 1; 1054 next = neigh->confirmed + neigh->parms->reachable_time; 1055 } else { 1056 neigh_dbg(2, "neigh %p is probed\n", neigh); 1057 neigh->nud_state = NUD_PROBE; 1058 neigh->updated = jiffies; 1059 atomic_set(&neigh->probes, 0); 1060 notify = 1; 1061 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); 1062 } 1063 } else { 1064 /* NUD_PROBE|NUD_INCOMPLETE */ 1065 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); 1066 } 1067 1068 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && 1069 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { 1070 neigh->nud_state = NUD_FAILED; 1071 notify = 1; 1072 neigh_invalidate(neigh); 1073 goto out; 1074 } 1075 1076 if (neigh->nud_state & NUD_IN_TIMER) { 1077 if (time_before(next, jiffies + HZ/2)) 1078 next = jiffies + HZ/2; 1079 if (!mod_timer(&neigh->timer, next)) 1080 neigh_hold(neigh); 1081 } 1082 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { 1083 neigh_probe(neigh); 1084 } else { 1085 out: 1086 write_unlock(&neigh->lock); 1087 } 1088 1089 if (notify) 1090 neigh_update_notify(neigh, 0); 1091 1092 neigh_release(neigh); 1093 } 1094 1095 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) 1096 { 1097 int rc; 1098 bool immediate_probe = false; 1099 1100 write_lock_bh(&neigh->lock); 1101 1102 rc = 0; 1103 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) 1104 goto out_unlock_bh; 1105 if (neigh->dead) 1106 goto out_dead; 1107 1108 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { 1109 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + 1110 NEIGH_VAR(neigh->parms, APP_PROBES)) { 1111 unsigned long next, now = jiffies; 1112 1113 atomic_set(&neigh->probes, 1114 NEIGH_VAR(neigh->parms, UCAST_PROBES)); 1115 neigh->nud_state = NUD_INCOMPLETE; 1116 neigh->updated = now; 1117 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), 1118 HZ/2); 1119 neigh_add_timer(neigh, next); 1120 immediate_probe = true; 1121 } else { 1122 neigh->nud_state = NUD_FAILED; 1123 neigh->updated = jiffies; 1124 write_unlock_bh(&neigh->lock); 1125 1126 kfree_skb(skb); 1127 return 1; 1128 } 1129 } else if (neigh->nud_state & NUD_STALE) { 1130 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1131 neigh->nud_state = NUD_DELAY; 1132 neigh->updated = jiffies; 1133 neigh_add_timer(neigh, jiffies + 1134 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME)); 1135 } 1136 1137 if (neigh->nud_state == NUD_INCOMPLETE) { 1138 if (skb) { 1139 while (neigh->arp_queue_len_bytes + skb->truesize > 1140 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) { 1141 struct sk_buff *buff; 1142 1143 buff = __skb_dequeue(&neigh->arp_queue); 1144 if (!buff) 1145 break; 1146 neigh->arp_queue_len_bytes -= buff->truesize; 1147 kfree_skb(buff); 1148 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); 1149 } 1150 skb_dst_force(skb); 1151 __skb_queue_tail(&neigh->arp_queue, skb); 1152 neigh->arp_queue_len_bytes += skb->truesize; 1153 } 1154 rc = 1; 1155 } 1156 out_unlock_bh: 1157 if (immediate_probe) 1158 neigh_probe(neigh); 1159 else 1160 write_unlock(&neigh->lock); 1161 local_bh_enable(); 1162 return rc; 1163 1164 out_dead: 1165 if (neigh->nud_state & NUD_STALE) 1166 goto out_unlock_bh; 1167 write_unlock_bh(&neigh->lock); 1168 kfree_skb(skb); 1169 return 1; 1170 } 1171 EXPORT_SYMBOL(__neigh_event_send); 1172 1173 static void neigh_update_hhs(struct neighbour *neigh) 1174 { 1175 struct hh_cache *hh; 1176 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) 1177 = NULL; 1178 1179 if (neigh->dev->header_ops) 1180 update = neigh->dev->header_ops->cache_update; 1181 1182 if (update) { 1183 hh = &neigh->hh; 1184 if (hh->hh_len) { 1185 write_seqlock_bh(&hh->hh_lock); 1186 update(hh, neigh->dev, neigh->ha); 1187 write_sequnlock_bh(&hh->hh_lock); 1188 } 1189 } 1190 } 1191 1192 1193 1194 /* Generic update routine. 1195 -- lladdr is new lladdr or NULL, if it is not supplied. 1196 -- new is new state. 1197 -- flags 1198 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr, 1199 if it is different. 1200 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected" 1201 lladdr instead of overriding it 1202 if it is different. 1203 NEIGH_UPDATE_F_ADMIN means that the change is administrative. 1204 1205 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing 1206 NTF_ROUTER flag. 1207 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as 1208 a router. 1209 1210 Caller MUST hold reference count on the entry. 1211 */ 1212 1213 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, 1214 u8 new, u32 flags, u32 nlmsg_pid, 1215 struct netlink_ext_ack *extack) 1216 { 1217 bool ext_learn_change = false; 1218 u8 old; 1219 int err; 1220 int notify = 0; 1221 struct net_device *dev; 1222 int update_isrouter = 0; 1223 1224 write_lock_bh(&neigh->lock); 1225 1226 dev = neigh->dev; 1227 old = neigh->nud_state; 1228 err = -EPERM; 1229 1230 if (!(flags & NEIGH_UPDATE_F_ADMIN) && 1231 (old & (NUD_NOARP | NUD_PERMANENT))) 1232 goto out; 1233 if (neigh->dead) { 1234 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead"); 1235 goto out; 1236 } 1237 1238 ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); 1239 1240 if (!(new & NUD_VALID)) { 1241 neigh_del_timer(neigh); 1242 if (old & NUD_CONNECTED) 1243 neigh_suspect(neigh); 1244 neigh->nud_state = new; 1245 err = 0; 1246 notify = old & NUD_VALID; 1247 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && 1248 (new & NUD_FAILED)) { 1249 neigh_invalidate(neigh); 1250 notify = 1; 1251 } 1252 goto out; 1253 } 1254 1255 /* Compare new lladdr with cached one */ 1256 if (!dev->addr_len) { 1257 /* First case: device needs no address. */ 1258 lladdr = neigh->ha; 1259 } else if (lladdr) { 1260 /* The second case: if something is already cached 1261 and a new address is proposed: 1262 - compare new & old 1263 - if they are different, check override flag 1264 */ 1265 if ((old & NUD_VALID) && 1266 !memcmp(lladdr, neigh->ha, dev->addr_len)) 1267 lladdr = neigh->ha; 1268 } else { 1269 /* No address is supplied; if we know something, 1270 use it, otherwise discard the request. 1271 */ 1272 err = -EINVAL; 1273 if (!(old & NUD_VALID)) { 1274 NL_SET_ERR_MSG(extack, "No link layer address given"); 1275 goto out; 1276 } 1277 lladdr = neigh->ha; 1278 } 1279 1280 /* Update confirmed timestamp for neighbour entry after we 1281 * received ARP packet even if it doesn't change IP to MAC binding. 1282 */ 1283 if (new & NUD_CONNECTED) 1284 neigh->confirmed = jiffies; 1285 1286 /* If entry was valid and address is not changed, 1287 do not change entry state, if new one is STALE. 1288 */ 1289 err = 0; 1290 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER; 1291 if (old & NUD_VALID) { 1292 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) { 1293 update_isrouter = 0; 1294 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) && 1295 (old & NUD_CONNECTED)) { 1296 lladdr = neigh->ha; 1297 new = NUD_STALE; 1298 } else 1299 goto out; 1300 } else { 1301 if (lladdr == neigh->ha && new == NUD_STALE && 1302 !(flags & NEIGH_UPDATE_F_ADMIN)) 1303 new = old; 1304 } 1305 } 1306 1307 /* Update timestamp only once we know we will make a change to the 1308 * neighbour entry. Otherwise we risk to move the locktime window with 1309 * noop updates and ignore relevant ARP updates. 1310 */ 1311 if (new != old || lladdr != neigh->ha) 1312 neigh->updated = jiffies; 1313 1314 if (new != old) { 1315 neigh_del_timer(neigh); 1316 if (new & NUD_PROBE) 1317 atomic_set(&neigh->probes, 0); 1318 if (new & NUD_IN_TIMER) 1319 neigh_add_timer(neigh, (jiffies + 1320 ((new & NUD_REACHABLE) ? 1321 neigh->parms->reachable_time : 1322 0))); 1323 neigh->nud_state = new; 1324 notify = 1; 1325 } 1326 1327 if (lladdr != neigh->ha) { 1328 write_seqlock(&neigh->ha_lock); 1329 memcpy(&neigh->ha, lladdr, dev->addr_len); 1330 write_sequnlock(&neigh->ha_lock); 1331 neigh_update_hhs(neigh); 1332 if (!(new & NUD_CONNECTED)) 1333 neigh->confirmed = jiffies - 1334 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1); 1335 notify = 1; 1336 } 1337 if (new == old) 1338 goto out; 1339 if (new & NUD_CONNECTED) 1340 neigh_connect(neigh); 1341 else 1342 neigh_suspect(neigh); 1343 if (!(old & NUD_VALID)) { 1344 struct sk_buff *skb; 1345 1346 /* Again: avoid dead loop if something went wrong */ 1347 1348 while (neigh->nud_state & NUD_VALID && 1349 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { 1350 struct dst_entry *dst = skb_dst(skb); 1351 struct neighbour *n2, *n1 = neigh; 1352 write_unlock_bh(&neigh->lock); 1353 1354 rcu_read_lock(); 1355 1356 /* Why not just use 'neigh' as-is? The problem is that 1357 * things such as shaper, eql, and sch_teql can end up 1358 * using alternative, different, neigh objects to output 1359 * the packet in the output path. So what we need to do 1360 * here is re-lookup the top-level neigh in the path so 1361 * we can reinject the packet there. 1362 */ 1363 n2 = NULL; 1364 if (dst) { 1365 n2 = dst_neigh_lookup_skb(dst, skb); 1366 if (n2) 1367 n1 = n2; 1368 } 1369 n1->output(n1, skb); 1370 if (n2) 1371 neigh_release(n2); 1372 rcu_read_unlock(); 1373 1374 write_lock_bh(&neigh->lock); 1375 } 1376 __skb_queue_purge(&neigh->arp_queue); 1377 neigh->arp_queue_len_bytes = 0; 1378 } 1379 out: 1380 if (update_isrouter) 1381 neigh_update_is_router(neigh, flags, ¬ify); 1382 write_unlock_bh(&neigh->lock); 1383 1384 if (((new ^ old) & NUD_PERMANENT) || ext_learn_change) 1385 neigh_update_gc_list(neigh); 1386 1387 if (notify) 1388 neigh_update_notify(neigh, nlmsg_pid); 1389 1390 return err; 1391 } 1392 1393 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, 1394 u32 flags, u32 nlmsg_pid) 1395 { 1396 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL); 1397 } 1398 EXPORT_SYMBOL(neigh_update); 1399 1400 /* Update the neigh to listen temporarily for probe responses, even if it is 1401 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing. 1402 */ 1403 void __neigh_set_probe_once(struct neighbour *neigh) 1404 { 1405 if (neigh->dead) 1406 return; 1407 neigh->updated = jiffies; 1408 if (!(neigh->nud_state & NUD_FAILED)) 1409 return; 1410 neigh->nud_state = NUD_INCOMPLETE; 1411 atomic_set(&neigh->probes, neigh_max_probes(neigh)); 1412 neigh_add_timer(neigh, 1413 jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME)); 1414 } 1415 EXPORT_SYMBOL(__neigh_set_probe_once); 1416 1417 struct neighbour *neigh_event_ns(struct neigh_table *tbl, 1418 u8 *lladdr, void *saddr, 1419 struct net_device *dev) 1420 { 1421 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev, 1422 lladdr || !dev->addr_len); 1423 if (neigh) 1424 neigh_update(neigh, lladdr, NUD_STALE, 1425 NEIGH_UPDATE_F_OVERRIDE, 0); 1426 return neigh; 1427 } 1428 EXPORT_SYMBOL(neigh_event_ns); 1429 1430 /* called with read_lock_bh(&n->lock); */ 1431 static void neigh_hh_init(struct neighbour *n) 1432 { 1433 struct net_device *dev = n->dev; 1434 __be16 prot = n->tbl->protocol; 1435 struct hh_cache *hh = &n->hh; 1436 1437 write_lock_bh(&n->lock); 1438 1439 /* Only one thread can come in here and initialize the 1440 * hh_cache entry. 1441 */ 1442 if (!hh->hh_len) 1443 dev->header_ops->cache(n, hh, prot); 1444 1445 write_unlock_bh(&n->lock); 1446 } 1447 1448 /* Slow and careful. */ 1449 1450 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) 1451 { 1452 int rc = 0; 1453 1454 if (!neigh_event_send(neigh, skb)) { 1455 int err; 1456 struct net_device *dev = neigh->dev; 1457 unsigned int seq; 1458 1459 if (dev->header_ops->cache && !neigh->hh.hh_len) 1460 neigh_hh_init(neigh); 1461 1462 do { 1463 __skb_pull(skb, skb_network_offset(skb)); 1464 seq = read_seqbegin(&neigh->ha_lock); 1465 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1466 neigh->ha, NULL, skb->len); 1467 } while (read_seqretry(&neigh->ha_lock, seq)); 1468 1469 if (err >= 0) 1470 rc = dev_queue_xmit(skb); 1471 else 1472 goto out_kfree_skb; 1473 } 1474 out: 1475 return rc; 1476 out_kfree_skb: 1477 rc = -EINVAL; 1478 kfree_skb(skb); 1479 goto out; 1480 } 1481 EXPORT_SYMBOL(neigh_resolve_output); 1482 1483 /* As fast as possible without hh cache */ 1484 1485 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb) 1486 { 1487 struct net_device *dev = neigh->dev; 1488 unsigned int seq; 1489 int err; 1490 1491 do { 1492 __skb_pull(skb, skb_network_offset(skb)); 1493 seq = read_seqbegin(&neigh->ha_lock); 1494 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1495 neigh->ha, NULL, skb->len); 1496 } while (read_seqretry(&neigh->ha_lock, seq)); 1497 1498 if (err >= 0) 1499 err = dev_queue_xmit(skb); 1500 else { 1501 err = -EINVAL; 1502 kfree_skb(skb); 1503 } 1504 return err; 1505 } 1506 EXPORT_SYMBOL(neigh_connected_output); 1507 1508 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb) 1509 { 1510 return dev_queue_xmit(skb); 1511 } 1512 EXPORT_SYMBOL(neigh_direct_output); 1513 1514 static void neigh_proxy_process(struct timer_list *t) 1515 { 1516 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer); 1517 long sched_next = 0; 1518 unsigned long now = jiffies; 1519 struct sk_buff *skb, *n; 1520 1521 spin_lock(&tbl->proxy_queue.lock); 1522 1523 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) { 1524 long tdif = NEIGH_CB(skb)->sched_next - now; 1525 1526 if (tdif <= 0) { 1527 struct net_device *dev = skb->dev; 1528 1529 __skb_unlink(skb, &tbl->proxy_queue); 1530 if (tbl->proxy_redo && netif_running(dev)) { 1531 rcu_read_lock(); 1532 tbl->proxy_redo(skb); 1533 rcu_read_unlock(); 1534 } else { 1535 kfree_skb(skb); 1536 } 1537 1538 dev_put(dev); 1539 } else if (!sched_next || tdif < sched_next) 1540 sched_next = tdif; 1541 } 1542 del_timer(&tbl->proxy_timer); 1543 if (sched_next) 1544 mod_timer(&tbl->proxy_timer, jiffies + sched_next); 1545 spin_unlock(&tbl->proxy_queue.lock); 1546 } 1547 1548 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, 1549 struct sk_buff *skb) 1550 { 1551 unsigned long now = jiffies; 1552 1553 unsigned long sched_next = now + (prandom_u32() % 1554 NEIGH_VAR(p, PROXY_DELAY)); 1555 1556 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { 1557 kfree_skb(skb); 1558 return; 1559 } 1560 1561 NEIGH_CB(skb)->sched_next = sched_next; 1562 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED; 1563 1564 spin_lock(&tbl->proxy_queue.lock); 1565 if (del_timer(&tbl->proxy_timer)) { 1566 if (time_before(tbl->proxy_timer.expires, sched_next)) 1567 sched_next = tbl->proxy_timer.expires; 1568 } 1569 skb_dst_drop(skb); 1570 dev_hold(skb->dev); 1571 __skb_queue_tail(&tbl->proxy_queue, skb); 1572 mod_timer(&tbl->proxy_timer, sched_next); 1573 spin_unlock(&tbl->proxy_queue.lock); 1574 } 1575 EXPORT_SYMBOL(pneigh_enqueue); 1576 1577 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, 1578 struct net *net, int ifindex) 1579 { 1580 struct neigh_parms *p; 1581 1582 list_for_each_entry(p, &tbl->parms_list, list) { 1583 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) || 1584 (!p->dev && !ifindex && net_eq(net, &init_net))) 1585 return p; 1586 } 1587 1588 return NULL; 1589 } 1590 1591 struct neigh_parms *neigh_parms_alloc(struct net_device *dev, 1592 struct neigh_table *tbl) 1593 { 1594 struct neigh_parms *p; 1595 struct net *net = dev_net(dev); 1596 const struct net_device_ops *ops = dev->netdev_ops; 1597 1598 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); 1599 if (p) { 1600 p->tbl = tbl; 1601 refcount_set(&p->refcnt, 1); 1602 p->reachable_time = 1603 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 1604 dev_hold(dev); 1605 p->dev = dev; 1606 write_pnet(&p->net, net); 1607 p->sysctl_table = NULL; 1608 1609 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { 1610 dev_put(dev); 1611 kfree(p); 1612 return NULL; 1613 } 1614 1615 write_lock_bh(&tbl->lock); 1616 list_add(&p->list, &tbl->parms.list); 1617 write_unlock_bh(&tbl->lock); 1618 1619 neigh_parms_data_state_cleanall(p); 1620 } 1621 return p; 1622 } 1623 EXPORT_SYMBOL(neigh_parms_alloc); 1624 1625 static void neigh_rcu_free_parms(struct rcu_head *head) 1626 { 1627 struct neigh_parms *parms = 1628 container_of(head, struct neigh_parms, rcu_head); 1629 1630 neigh_parms_put(parms); 1631 } 1632 1633 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) 1634 { 1635 if (!parms || parms == &tbl->parms) 1636 return; 1637 write_lock_bh(&tbl->lock); 1638 list_del(&parms->list); 1639 parms->dead = 1; 1640 write_unlock_bh(&tbl->lock); 1641 if (parms->dev) 1642 dev_put(parms->dev); 1643 call_rcu(&parms->rcu_head, neigh_rcu_free_parms); 1644 } 1645 EXPORT_SYMBOL(neigh_parms_release); 1646 1647 static void neigh_parms_destroy(struct neigh_parms *parms) 1648 { 1649 kfree(parms); 1650 } 1651 1652 static struct lock_class_key neigh_table_proxy_queue_class; 1653 1654 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly; 1655 1656 void neigh_table_init(int index, struct neigh_table *tbl) 1657 { 1658 unsigned long now = jiffies; 1659 unsigned long phsize; 1660 1661 INIT_LIST_HEAD(&tbl->parms_list); 1662 INIT_LIST_HEAD(&tbl->gc_list); 1663 list_add(&tbl->parms.list, &tbl->parms_list); 1664 write_pnet(&tbl->parms.net, &init_net); 1665 refcount_set(&tbl->parms.refcnt, 1); 1666 tbl->parms.reachable_time = 1667 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); 1668 1669 tbl->stats = alloc_percpu(struct neigh_statistics); 1670 if (!tbl->stats) 1671 panic("cannot create neighbour cache statistics"); 1672 1673 #ifdef CONFIG_PROC_FS 1674 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat, 1675 &neigh_stat_seq_ops, tbl)) 1676 panic("cannot create neighbour proc dir entry"); 1677 #endif 1678 1679 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3)); 1680 1681 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); 1682 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); 1683 1684 if (!tbl->nht || !tbl->phash_buckets) 1685 panic("cannot allocate neighbour cache hashes"); 1686 1687 if (!tbl->entry_size) 1688 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) + 1689 tbl->key_len, NEIGH_PRIV_ALIGN); 1690 else 1691 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); 1692 1693 rwlock_init(&tbl->lock); 1694 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); 1695 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 1696 tbl->parms.reachable_time); 1697 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0); 1698 skb_queue_head_init_class(&tbl->proxy_queue, 1699 &neigh_table_proxy_queue_class); 1700 1701 tbl->last_flush = now; 1702 tbl->last_rand = now + tbl->parms.reachable_time * 20; 1703 1704 neigh_tables[index] = tbl; 1705 } 1706 EXPORT_SYMBOL(neigh_table_init); 1707 1708 int neigh_table_clear(int index, struct neigh_table *tbl) 1709 { 1710 neigh_tables[index] = NULL; 1711 /* It is not clean... Fix it to unload IPv6 module safely */ 1712 cancel_delayed_work_sync(&tbl->gc_work); 1713 del_timer_sync(&tbl->proxy_timer); 1714 pneigh_queue_purge(&tbl->proxy_queue); 1715 neigh_ifdown(tbl, NULL); 1716 if (atomic_read(&tbl->entries)) 1717 pr_crit("neighbour leakage\n"); 1718 1719 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, 1720 neigh_hash_free_rcu); 1721 tbl->nht = NULL; 1722 1723 kfree(tbl->phash_buckets); 1724 tbl->phash_buckets = NULL; 1725 1726 remove_proc_entry(tbl->id, init_net.proc_net_stat); 1727 1728 free_percpu(tbl->stats); 1729 tbl->stats = NULL; 1730 1731 return 0; 1732 } 1733 EXPORT_SYMBOL(neigh_table_clear); 1734 1735 static struct neigh_table *neigh_find_table(int family) 1736 { 1737 struct neigh_table *tbl = NULL; 1738 1739 switch (family) { 1740 case AF_INET: 1741 tbl = neigh_tables[NEIGH_ARP_TABLE]; 1742 break; 1743 case AF_INET6: 1744 tbl = neigh_tables[NEIGH_ND_TABLE]; 1745 break; 1746 case AF_DECnet: 1747 tbl = neigh_tables[NEIGH_DN_TABLE]; 1748 break; 1749 } 1750 1751 return tbl; 1752 } 1753 1754 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, 1755 struct netlink_ext_ack *extack) 1756 { 1757 struct net *net = sock_net(skb->sk); 1758 struct ndmsg *ndm; 1759 struct nlattr *dst_attr; 1760 struct neigh_table *tbl; 1761 struct neighbour *neigh; 1762 struct net_device *dev = NULL; 1763 int err = -EINVAL; 1764 1765 ASSERT_RTNL(); 1766 if (nlmsg_len(nlh) < sizeof(*ndm)) 1767 goto out; 1768 1769 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST); 1770 if (!dst_attr) { 1771 NL_SET_ERR_MSG(extack, "Network address not specified"); 1772 goto out; 1773 } 1774 1775 ndm = nlmsg_data(nlh); 1776 if (ndm->ndm_ifindex) { 1777 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 1778 if (dev == NULL) { 1779 err = -ENODEV; 1780 goto out; 1781 } 1782 } 1783 1784 tbl = neigh_find_table(ndm->ndm_family); 1785 if (tbl == NULL) 1786 return -EAFNOSUPPORT; 1787 1788 if (nla_len(dst_attr) < (int)tbl->key_len) { 1789 NL_SET_ERR_MSG(extack, "Invalid network address"); 1790 goto out; 1791 } 1792 1793 if (ndm->ndm_flags & NTF_PROXY) { 1794 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); 1795 goto out; 1796 } 1797 1798 if (dev == NULL) 1799 goto out; 1800 1801 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); 1802 if (neigh == NULL) { 1803 err = -ENOENT; 1804 goto out; 1805 } 1806 1807 err = __neigh_update(neigh, NULL, NUD_FAILED, 1808 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, 1809 NETLINK_CB(skb).portid, extack); 1810 write_lock_bh(&tbl->lock); 1811 neigh_release(neigh); 1812 neigh_remove_one(neigh, tbl); 1813 write_unlock_bh(&tbl->lock); 1814 1815 out: 1816 return err; 1817 } 1818 1819 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, 1820 struct netlink_ext_ack *extack) 1821 { 1822 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE | 1823 NEIGH_UPDATE_F_OVERRIDE_ISROUTER; 1824 struct net *net = sock_net(skb->sk); 1825 struct ndmsg *ndm; 1826 struct nlattr *tb[NDA_MAX+1]; 1827 struct neigh_table *tbl; 1828 struct net_device *dev = NULL; 1829 struct neighbour *neigh; 1830 void *dst, *lladdr; 1831 u8 protocol = 0; 1832 int err; 1833 1834 ASSERT_RTNL(); 1835 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); 1836 if (err < 0) 1837 goto out; 1838 1839 err = -EINVAL; 1840 if (!tb[NDA_DST]) { 1841 NL_SET_ERR_MSG(extack, "Network address not specified"); 1842 goto out; 1843 } 1844 1845 ndm = nlmsg_data(nlh); 1846 if (ndm->ndm_ifindex) { 1847 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 1848 if (dev == NULL) { 1849 err = -ENODEV; 1850 goto out; 1851 } 1852 1853 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) { 1854 NL_SET_ERR_MSG(extack, "Invalid link address"); 1855 goto out; 1856 } 1857 } 1858 1859 tbl = neigh_find_table(ndm->ndm_family); 1860 if (tbl == NULL) 1861 return -EAFNOSUPPORT; 1862 1863 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) { 1864 NL_SET_ERR_MSG(extack, "Invalid network address"); 1865 goto out; 1866 } 1867 1868 dst = nla_data(tb[NDA_DST]); 1869 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; 1870 1871 if (tb[NDA_PROTOCOL]) { 1872 if (nla_len(tb[NDA_PROTOCOL]) != sizeof(u8)) { 1873 NL_SET_ERR_MSG(extack, "Invalid protocol attribute"); 1874 goto out; 1875 } 1876 protocol = nla_get_u8(tb[NDA_PROTOCOL]); 1877 } 1878 1879 if (ndm->ndm_flags & NTF_PROXY) { 1880 struct pneigh_entry *pn; 1881 1882 err = -ENOBUFS; 1883 pn = pneigh_lookup(tbl, net, dst, dev, 1); 1884 if (pn) { 1885 pn->flags = ndm->ndm_flags; 1886 if (protocol) 1887 pn->protocol = protocol; 1888 err = 0; 1889 } 1890 goto out; 1891 } 1892 1893 if (!dev) { 1894 NL_SET_ERR_MSG(extack, "Device not specified"); 1895 goto out; 1896 } 1897 1898 neigh = neigh_lookup(tbl, dst, dev); 1899 if (neigh == NULL) { 1900 bool exempt_from_gc; 1901 1902 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { 1903 err = -ENOENT; 1904 goto out; 1905 } 1906 1907 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || 1908 ndm->ndm_flags & NTF_EXT_LEARNED; 1909 neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true); 1910 if (IS_ERR(neigh)) { 1911 err = PTR_ERR(neigh); 1912 goto out; 1913 } 1914 } else { 1915 if (nlh->nlmsg_flags & NLM_F_EXCL) { 1916 err = -EEXIST; 1917 neigh_release(neigh); 1918 goto out; 1919 } 1920 1921 if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) 1922 flags &= ~(NEIGH_UPDATE_F_OVERRIDE | 1923 NEIGH_UPDATE_F_OVERRIDE_ISROUTER); 1924 } 1925 1926 if (ndm->ndm_flags & NTF_EXT_LEARNED) 1927 flags |= NEIGH_UPDATE_F_EXT_LEARNED; 1928 1929 if (ndm->ndm_flags & NTF_ROUTER) 1930 flags |= NEIGH_UPDATE_F_ISROUTER; 1931 1932 if (ndm->ndm_flags & NTF_USE) { 1933 neigh_event_send(neigh, NULL); 1934 err = 0; 1935 } else 1936 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, 1937 NETLINK_CB(skb).portid, extack); 1938 1939 if (protocol) 1940 neigh->protocol = protocol; 1941 1942 neigh_release(neigh); 1943 1944 out: 1945 return err; 1946 } 1947 1948 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) 1949 { 1950 struct nlattr *nest; 1951 1952 nest = nla_nest_start(skb, NDTA_PARMS); 1953 if (nest == NULL) 1954 return -ENOBUFS; 1955 1956 if ((parms->dev && 1957 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || 1958 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) || 1959 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, 1960 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) || 1961 /* approximative value for deprecated QUEUE_LEN (in packets) */ 1962 nla_put_u32(skb, NDTPA_QUEUE_LEN, 1963 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) || 1964 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) || 1965 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) || 1966 nla_put_u32(skb, NDTPA_UCAST_PROBES, 1967 NEIGH_VAR(parms, UCAST_PROBES)) || 1968 nla_put_u32(skb, NDTPA_MCAST_PROBES, 1969 NEIGH_VAR(parms, MCAST_PROBES)) || 1970 nla_put_u32(skb, NDTPA_MCAST_REPROBES, 1971 NEIGH_VAR(parms, MCAST_REPROBES)) || 1972 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time, 1973 NDTPA_PAD) || 1974 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, 1975 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) || 1976 nla_put_msecs(skb, NDTPA_GC_STALETIME, 1977 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) || 1978 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, 1979 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) || 1980 nla_put_msecs(skb, NDTPA_RETRANS_TIME, 1981 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) || 1982 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, 1983 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) || 1984 nla_put_msecs(skb, NDTPA_PROXY_DELAY, 1985 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) || 1986 nla_put_msecs(skb, NDTPA_LOCKTIME, 1987 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD)) 1988 goto nla_put_failure; 1989 return nla_nest_end(skb, nest); 1990 1991 nla_put_failure: 1992 nla_nest_cancel(skb, nest); 1993 return -EMSGSIZE; 1994 } 1995 1996 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, 1997 u32 pid, u32 seq, int type, int flags) 1998 { 1999 struct nlmsghdr *nlh; 2000 struct ndtmsg *ndtmsg; 2001 2002 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); 2003 if (nlh == NULL) 2004 return -EMSGSIZE; 2005 2006 ndtmsg = nlmsg_data(nlh); 2007 2008 read_lock_bh(&tbl->lock); 2009 ndtmsg->ndtm_family = tbl->family; 2010 ndtmsg->ndtm_pad1 = 0; 2011 ndtmsg->ndtm_pad2 = 0; 2012 2013 if (nla_put_string(skb, NDTA_NAME, tbl->id) || 2014 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) || 2015 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) || 2016 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) || 2017 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3)) 2018 goto nla_put_failure; 2019 { 2020 unsigned long now = jiffies; 2021 unsigned int flush_delta = now - tbl->last_flush; 2022 unsigned int rand_delta = now - tbl->last_rand; 2023 struct neigh_hash_table *nht; 2024 struct ndt_config ndc = { 2025 .ndtc_key_len = tbl->key_len, 2026 .ndtc_entry_size = tbl->entry_size, 2027 .ndtc_entries = atomic_read(&tbl->entries), 2028 .ndtc_last_flush = jiffies_to_msecs(flush_delta), 2029 .ndtc_last_rand = jiffies_to_msecs(rand_delta), 2030 .ndtc_proxy_qlen = tbl->proxy_queue.qlen, 2031 }; 2032 2033 rcu_read_lock_bh(); 2034 nht = rcu_dereference_bh(tbl->nht); 2035 ndc.ndtc_hash_rnd = nht->hash_rnd[0]; 2036 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); 2037 rcu_read_unlock_bh(); 2038 2039 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc)) 2040 goto nla_put_failure; 2041 } 2042 2043 { 2044 int cpu; 2045 struct ndt_stats ndst; 2046 2047 memset(&ndst, 0, sizeof(ndst)); 2048 2049 for_each_possible_cpu(cpu) { 2050 struct neigh_statistics *st; 2051 2052 st = per_cpu_ptr(tbl->stats, cpu); 2053 ndst.ndts_allocs += st->allocs; 2054 ndst.ndts_destroys += st->destroys; 2055 ndst.ndts_hash_grows += st->hash_grows; 2056 ndst.ndts_res_failed += st->res_failed; 2057 ndst.ndts_lookups += st->lookups; 2058 ndst.ndts_hits += st->hits; 2059 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast; 2060 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast; 2061 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs; 2062 ndst.ndts_forced_gc_runs += st->forced_gc_runs; 2063 ndst.ndts_table_fulls += st->table_fulls; 2064 } 2065 2066 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst, 2067 NDTA_PAD)) 2068 goto nla_put_failure; 2069 } 2070 2071 BUG_ON(tbl->parms.dev); 2072 if (neightbl_fill_parms(skb, &tbl->parms) < 0) 2073 goto nla_put_failure; 2074 2075 read_unlock_bh(&tbl->lock); 2076 nlmsg_end(skb, nlh); 2077 return 0; 2078 2079 nla_put_failure: 2080 read_unlock_bh(&tbl->lock); 2081 nlmsg_cancel(skb, nlh); 2082 return -EMSGSIZE; 2083 } 2084 2085 static int neightbl_fill_param_info(struct sk_buff *skb, 2086 struct neigh_table *tbl, 2087 struct neigh_parms *parms, 2088 u32 pid, u32 seq, int type, 2089 unsigned int flags) 2090 { 2091 struct ndtmsg *ndtmsg; 2092 struct nlmsghdr *nlh; 2093 2094 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); 2095 if (nlh == NULL) 2096 return -EMSGSIZE; 2097 2098 ndtmsg = nlmsg_data(nlh); 2099 2100 read_lock_bh(&tbl->lock); 2101 ndtmsg->ndtm_family = tbl->family; 2102 ndtmsg->ndtm_pad1 = 0; 2103 ndtmsg->ndtm_pad2 = 0; 2104 2105 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 || 2106 neightbl_fill_parms(skb, parms) < 0) 2107 goto errout; 2108 2109 read_unlock_bh(&tbl->lock); 2110 nlmsg_end(skb, nlh); 2111 return 0; 2112 errout: 2113 read_unlock_bh(&tbl->lock); 2114 nlmsg_cancel(skb, nlh); 2115 return -EMSGSIZE; 2116 } 2117 2118 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { 2119 [NDTA_NAME] = { .type = NLA_STRING }, 2120 [NDTA_THRESH1] = { .type = NLA_U32 }, 2121 [NDTA_THRESH2] = { .type = NLA_U32 }, 2122 [NDTA_THRESH3] = { .type = NLA_U32 }, 2123 [NDTA_GC_INTERVAL] = { .type = NLA_U64 }, 2124 [NDTA_PARMS] = { .type = NLA_NESTED }, 2125 }; 2126 2127 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { 2128 [NDTPA_IFINDEX] = { .type = NLA_U32 }, 2129 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, 2130 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, 2131 [NDTPA_APP_PROBES] = { .type = NLA_U32 }, 2132 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, 2133 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 }, 2134 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 }, 2135 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, 2136 [NDTPA_GC_STALETIME] = { .type = NLA_U64 }, 2137 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 }, 2138 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 }, 2139 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 }, 2140 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 }, 2141 [NDTPA_LOCKTIME] = { .type = NLA_U64 }, 2142 }; 2143 2144 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, 2145 struct netlink_ext_ack *extack) 2146 { 2147 struct net *net = sock_net(skb->sk); 2148 struct neigh_table *tbl; 2149 struct ndtmsg *ndtmsg; 2150 struct nlattr *tb[NDTA_MAX+1]; 2151 bool found = false; 2152 int err, tidx; 2153 2154 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, 2155 nl_neightbl_policy, extack); 2156 if (err < 0) 2157 goto errout; 2158 2159 if (tb[NDTA_NAME] == NULL) { 2160 err = -EINVAL; 2161 goto errout; 2162 } 2163 2164 ndtmsg = nlmsg_data(nlh); 2165 2166 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { 2167 tbl = neigh_tables[tidx]; 2168 if (!tbl) 2169 continue; 2170 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) 2171 continue; 2172 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) { 2173 found = true; 2174 break; 2175 } 2176 } 2177 2178 if (!found) 2179 return -ENOENT; 2180 2181 /* 2182 * We acquire tbl->lock to be nice to the periodic timers and 2183 * make sure they always see a consistent set of values. 2184 */ 2185 write_lock_bh(&tbl->lock); 2186 2187 if (tb[NDTA_PARMS]) { 2188 struct nlattr *tbp[NDTPA_MAX+1]; 2189 struct neigh_parms *p; 2190 int i, ifindex = 0; 2191 2192 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS], 2193 nl_ntbl_parm_policy, extack); 2194 if (err < 0) 2195 goto errout_tbl_lock; 2196 2197 if (tbp[NDTPA_IFINDEX]) 2198 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); 2199 2200 p = lookup_neigh_parms(tbl, net, ifindex); 2201 if (p == NULL) { 2202 err = -ENOENT; 2203 goto errout_tbl_lock; 2204 } 2205 2206 for (i = 1; i <= NDTPA_MAX; i++) { 2207 if (tbp[i] == NULL) 2208 continue; 2209 2210 switch (i) { 2211 case NDTPA_QUEUE_LEN: 2212 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, 2213 nla_get_u32(tbp[i]) * 2214 SKB_TRUESIZE(ETH_FRAME_LEN)); 2215 break; 2216 case NDTPA_QUEUE_LENBYTES: 2217 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, 2218 nla_get_u32(tbp[i])); 2219 break; 2220 case NDTPA_PROXY_QLEN: 2221 NEIGH_VAR_SET(p, PROXY_QLEN, 2222 nla_get_u32(tbp[i])); 2223 break; 2224 case NDTPA_APP_PROBES: 2225 NEIGH_VAR_SET(p, APP_PROBES, 2226 nla_get_u32(tbp[i])); 2227 break; 2228 case NDTPA_UCAST_PROBES: 2229 NEIGH_VAR_SET(p, UCAST_PROBES, 2230 nla_get_u32(tbp[i])); 2231 break; 2232 case NDTPA_MCAST_PROBES: 2233 NEIGH_VAR_SET(p, MCAST_PROBES, 2234 nla_get_u32(tbp[i])); 2235 break; 2236 case NDTPA_MCAST_REPROBES: 2237 NEIGH_VAR_SET(p, MCAST_REPROBES, 2238 nla_get_u32(tbp[i])); 2239 break; 2240 case NDTPA_BASE_REACHABLE_TIME: 2241 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, 2242 nla_get_msecs(tbp[i])); 2243 /* update reachable_time as well, otherwise, the change will 2244 * only be effective after the next time neigh_periodic_work 2245 * decides to recompute it (can be multiple minutes) 2246 */ 2247 p->reachable_time = 2248 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 2249 break; 2250 case NDTPA_GC_STALETIME: 2251 NEIGH_VAR_SET(p, GC_STALETIME, 2252 nla_get_msecs(tbp[i])); 2253 break; 2254 case NDTPA_DELAY_PROBE_TIME: 2255 NEIGH_VAR_SET(p, DELAY_PROBE_TIME, 2256 nla_get_msecs(tbp[i])); 2257 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); 2258 break; 2259 case NDTPA_RETRANS_TIME: 2260 NEIGH_VAR_SET(p, RETRANS_TIME, 2261 nla_get_msecs(tbp[i])); 2262 break; 2263 case NDTPA_ANYCAST_DELAY: 2264 NEIGH_VAR_SET(p, ANYCAST_DELAY, 2265 nla_get_msecs(tbp[i])); 2266 break; 2267 case NDTPA_PROXY_DELAY: 2268 NEIGH_VAR_SET(p, PROXY_DELAY, 2269 nla_get_msecs(tbp[i])); 2270 break; 2271 case NDTPA_LOCKTIME: 2272 NEIGH_VAR_SET(p, LOCKTIME, 2273 nla_get_msecs(tbp[i])); 2274 break; 2275 } 2276 } 2277 } 2278 2279 err = -ENOENT; 2280 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] || 2281 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) && 2282 !net_eq(net, &init_net)) 2283 goto errout_tbl_lock; 2284 2285 if (tb[NDTA_THRESH1]) 2286 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); 2287 2288 if (tb[NDTA_THRESH2]) 2289 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]); 2290 2291 if (tb[NDTA_THRESH3]) 2292 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]); 2293 2294 if (tb[NDTA_GC_INTERVAL]) 2295 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]); 2296 2297 err = 0; 2298 2299 errout_tbl_lock: 2300 write_unlock_bh(&tbl->lock); 2301 errout: 2302 return err; 2303 } 2304 2305 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh, 2306 struct netlink_ext_ack *extack) 2307 { 2308 struct ndtmsg *ndtm; 2309 2310 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) { 2311 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request"); 2312 return -EINVAL; 2313 } 2314 2315 ndtm = nlmsg_data(nlh); 2316 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) { 2317 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request"); 2318 return -EINVAL; 2319 } 2320 2321 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) { 2322 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request"); 2323 return -EINVAL; 2324 } 2325 2326 return 0; 2327 } 2328 2329 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2330 { 2331 const struct nlmsghdr *nlh = cb->nlh; 2332 struct net *net = sock_net(skb->sk); 2333 int family, tidx, nidx = 0; 2334 int tbl_skip = cb->args[0]; 2335 int neigh_skip = cb->args[1]; 2336 struct neigh_table *tbl; 2337 2338 if (cb->strict_check) { 2339 int err = neightbl_valid_dump_info(nlh, cb->extack); 2340 2341 if (err < 0) 2342 return err; 2343 } 2344 2345 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; 2346 2347 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { 2348 struct neigh_parms *p; 2349 2350 tbl = neigh_tables[tidx]; 2351 if (!tbl) 2352 continue; 2353 2354 if (tidx < tbl_skip || (family && tbl->family != family)) 2355 continue; 2356 2357 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, 2358 nlh->nlmsg_seq, RTM_NEWNEIGHTBL, 2359 NLM_F_MULTI) < 0) 2360 break; 2361 2362 nidx = 0; 2363 p = list_next_entry(&tbl->parms, list); 2364 list_for_each_entry_from(p, &tbl->parms_list, list) { 2365 if (!net_eq(neigh_parms_net(p), net)) 2366 continue; 2367 2368 if (nidx < neigh_skip) 2369 goto next; 2370 2371 if (neightbl_fill_param_info(skb, tbl, p, 2372 NETLINK_CB(cb->skb).portid, 2373 nlh->nlmsg_seq, 2374 RTM_NEWNEIGHTBL, 2375 NLM_F_MULTI) < 0) 2376 goto out; 2377 next: 2378 nidx++; 2379 } 2380 2381 neigh_skip = 0; 2382 } 2383 out: 2384 cb->args[0] = tidx; 2385 cb->args[1] = nidx; 2386 2387 return skb->len; 2388 } 2389 2390 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, 2391 u32 pid, u32 seq, int type, unsigned int flags) 2392 { 2393 unsigned long now = jiffies; 2394 struct nda_cacheinfo ci; 2395 struct nlmsghdr *nlh; 2396 struct ndmsg *ndm; 2397 2398 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); 2399 if (nlh == NULL) 2400 return -EMSGSIZE; 2401 2402 ndm = nlmsg_data(nlh); 2403 ndm->ndm_family = neigh->ops->family; 2404 ndm->ndm_pad1 = 0; 2405 ndm->ndm_pad2 = 0; 2406 ndm->ndm_flags = neigh->flags; 2407 ndm->ndm_type = neigh->type; 2408 ndm->ndm_ifindex = neigh->dev->ifindex; 2409 2410 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key)) 2411 goto nla_put_failure; 2412 2413 read_lock_bh(&neigh->lock); 2414 ndm->ndm_state = neigh->nud_state; 2415 if (neigh->nud_state & NUD_VALID) { 2416 char haddr[MAX_ADDR_LEN]; 2417 2418 neigh_ha_snapshot(haddr, neigh, neigh->dev); 2419 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) { 2420 read_unlock_bh(&neigh->lock); 2421 goto nla_put_failure; 2422 } 2423 } 2424 2425 ci.ndm_used = jiffies_to_clock_t(now - neigh->used); 2426 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed); 2427 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated); 2428 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1; 2429 read_unlock_bh(&neigh->lock); 2430 2431 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) || 2432 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) 2433 goto nla_put_failure; 2434 2435 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol)) 2436 goto nla_put_failure; 2437 2438 nlmsg_end(skb, nlh); 2439 return 0; 2440 2441 nla_put_failure: 2442 nlmsg_cancel(skb, nlh); 2443 return -EMSGSIZE; 2444 } 2445 2446 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, 2447 u32 pid, u32 seq, int type, unsigned int flags, 2448 struct neigh_table *tbl) 2449 { 2450 struct nlmsghdr *nlh; 2451 struct ndmsg *ndm; 2452 2453 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); 2454 if (nlh == NULL) 2455 return -EMSGSIZE; 2456 2457 ndm = nlmsg_data(nlh); 2458 ndm->ndm_family = tbl->family; 2459 ndm->ndm_pad1 = 0; 2460 ndm->ndm_pad2 = 0; 2461 ndm->ndm_flags = pn->flags | NTF_PROXY; 2462 ndm->ndm_type = RTN_UNICAST; 2463 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0; 2464 ndm->ndm_state = NUD_NONE; 2465 2466 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) 2467 goto nla_put_failure; 2468 2469 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol)) 2470 goto nla_put_failure; 2471 2472 nlmsg_end(skb, nlh); 2473 return 0; 2474 2475 nla_put_failure: 2476 nlmsg_cancel(skb, nlh); 2477 return -EMSGSIZE; 2478 } 2479 2480 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid) 2481 { 2482 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); 2483 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid); 2484 } 2485 2486 static bool neigh_master_filtered(struct net_device *dev, int master_idx) 2487 { 2488 struct net_device *master; 2489 2490 if (!master_idx) 2491 return false; 2492 2493 master = dev ? netdev_master_upper_dev_get(dev) : NULL; 2494 if (!master || master->ifindex != master_idx) 2495 return true; 2496 2497 return false; 2498 } 2499 2500 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx) 2501 { 2502 if (filter_idx && (!dev || dev->ifindex != filter_idx)) 2503 return true; 2504 2505 return false; 2506 } 2507 2508 struct neigh_dump_filter { 2509 int master_idx; 2510 int dev_idx; 2511 }; 2512 2513 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2514 struct netlink_callback *cb, 2515 struct neigh_dump_filter *filter) 2516 { 2517 struct net *net = sock_net(skb->sk); 2518 struct neighbour *n; 2519 int rc, h, s_h = cb->args[1]; 2520 int idx, s_idx = idx = cb->args[2]; 2521 struct neigh_hash_table *nht; 2522 unsigned int flags = NLM_F_MULTI; 2523 2524 if (filter->dev_idx || filter->master_idx) 2525 flags |= NLM_F_DUMP_FILTERED; 2526 2527 rcu_read_lock_bh(); 2528 nht = rcu_dereference_bh(tbl->nht); 2529 2530 for (h = s_h; h < (1 << nht->hash_shift); h++) { 2531 if (h > s_h) 2532 s_idx = 0; 2533 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; 2534 n != NULL; 2535 n = rcu_dereference_bh(n->next)) { 2536 if (idx < s_idx || !net_eq(dev_net(n->dev), net)) 2537 goto next; 2538 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || 2539 neigh_master_filtered(n->dev, filter->master_idx)) 2540 goto next; 2541 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2542 cb->nlh->nlmsg_seq, 2543 RTM_NEWNEIGH, 2544 flags) < 0) { 2545 rc = -1; 2546 goto out; 2547 } 2548 next: 2549 idx++; 2550 } 2551 } 2552 rc = skb->len; 2553 out: 2554 rcu_read_unlock_bh(); 2555 cb->args[1] = h; 2556 cb->args[2] = idx; 2557 return rc; 2558 } 2559 2560 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2561 struct netlink_callback *cb, 2562 struct neigh_dump_filter *filter) 2563 { 2564 struct pneigh_entry *n; 2565 struct net *net = sock_net(skb->sk); 2566 int rc, h, s_h = cb->args[3]; 2567 int idx, s_idx = idx = cb->args[4]; 2568 unsigned int flags = NLM_F_MULTI; 2569 2570 if (filter->dev_idx || filter->master_idx) 2571 flags |= NLM_F_DUMP_FILTERED; 2572 2573 read_lock_bh(&tbl->lock); 2574 2575 for (h = s_h; h <= PNEIGH_HASHMASK; h++) { 2576 if (h > s_h) 2577 s_idx = 0; 2578 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { 2579 if (idx < s_idx || pneigh_net(n) != net) 2580 goto next; 2581 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || 2582 neigh_master_filtered(n->dev, filter->master_idx)) 2583 goto next; 2584 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2585 cb->nlh->nlmsg_seq, 2586 RTM_NEWNEIGH, flags, tbl) < 0) { 2587 read_unlock_bh(&tbl->lock); 2588 rc = -1; 2589 goto out; 2590 } 2591 next: 2592 idx++; 2593 } 2594 } 2595 2596 read_unlock_bh(&tbl->lock); 2597 rc = skb->len; 2598 out: 2599 cb->args[3] = h; 2600 cb->args[4] = idx; 2601 return rc; 2602 2603 } 2604 2605 static int neigh_valid_dump_req(const struct nlmsghdr *nlh, 2606 bool strict_check, 2607 struct neigh_dump_filter *filter, 2608 struct netlink_ext_ack *extack) 2609 { 2610 struct nlattr *tb[NDA_MAX + 1]; 2611 int err, i; 2612 2613 if (strict_check) { 2614 struct ndmsg *ndm; 2615 2616 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { 2617 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request"); 2618 return -EINVAL; 2619 } 2620 2621 ndm = nlmsg_data(nlh); 2622 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex || 2623 ndm->ndm_state || ndm->ndm_flags || ndm->ndm_type) { 2624 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request"); 2625 return -EINVAL; 2626 } 2627 2628 err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, 2629 NULL, extack); 2630 } else { 2631 err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, 2632 NULL, extack); 2633 } 2634 if (err < 0) 2635 return err; 2636 2637 for (i = 0; i <= NDA_MAX; ++i) { 2638 if (!tb[i]) 2639 continue; 2640 2641 /* all new attributes should require strict_check */ 2642 switch (i) { 2643 case NDA_IFINDEX: 2644 if (nla_len(tb[i]) != sizeof(u32)) { 2645 NL_SET_ERR_MSG(extack, "Invalid IFINDEX attribute in neighbor dump request"); 2646 return -EINVAL; 2647 } 2648 filter->dev_idx = nla_get_u32(tb[i]); 2649 break; 2650 case NDA_MASTER: 2651 if (nla_len(tb[i]) != sizeof(u32)) { 2652 NL_SET_ERR_MSG(extack, "Invalid MASTER attribute in neighbor dump request"); 2653 return -EINVAL; 2654 } 2655 filter->master_idx = nla_get_u32(tb[i]); 2656 break; 2657 default: 2658 if (strict_check) { 2659 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request"); 2660 return -EINVAL; 2661 } 2662 } 2663 } 2664 2665 return 0; 2666 } 2667 2668 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2669 { 2670 const struct nlmsghdr *nlh = cb->nlh; 2671 struct neigh_dump_filter filter = {}; 2672 struct neigh_table *tbl; 2673 int t, family, s_t; 2674 int proxy = 0; 2675 int err; 2676 2677 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; 2678 2679 /* check for full ndmsg structure presence, family member is 2680 * the same for both structures 2681 */ 2682 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) && 2683 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY) 2684 proxy = 1; 2685 2686 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack); 2687 if (err < 0 && cb->strict_check) 2688 return err; 2689 2690 s_t = cb->args[0]; 2691 2692 for (t = 0; t < NEIGH_NR_TABLES; t++) { 2693 tbl = neigh_tables[t]; 2694 2695 if (!tbl) 2696 continue; 2697 if (t < s_t || (family && tbl->family != family)) 2698 continue; 2699 if (t > s_t) 2700 memset(&cb->args[1], 0, sizeof(cb->args) - 2701 sizeof(cb->args[0])); 2702 if (proxy) 2703 err = pneigh_dump_table(tbl, skb, cb, &filter); 2704 else 2705 err = neigh_dump_table(tbl, skb, cb, &filter); 2706 if (err < 0) 2707 break; 2708 } 2709 2710 cb->args[0] = t; 2711 return skb->len; 2712 } 2713 2714 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) 2715 { 2716 int chain; 2717 struct neigh_hash_table *nht; 2718 2719 rcu_read_lock_bh(); 2720 nht = rcu_dereference_bh(tbl->nht); 2721 2722 read_lock(&tbl->lock); /* avoid resizes */ 2723 for (chain = 0; chain < (1 << nht->hash_shift); chain++) { 2724 struct neighbour *n; 2725 2726 for (n = rcu_dereference_bh(nht->hash_buckets[chain]); 2727 n != NULL; 2728 n = rcu_dereference_bh(n->next)) 2729 cb(n, cookie); 2730 } 2731 read_unlock(&tbl->lock); 2732 rcu_read_unlock_bh(); 2733 } 2734 EXPORT_SYMBOL(neigh_for_each); 2735 2736 /* The tbl->lock must be held as a writer and BH disabled. */ 2737 void __neigh_for_each_release(struct neigh_table *tbl, 2738 int (*cb)(struct neighbour *)) 2739 { 2740 int chain; 2741 struct neigh_hash_table *nht; 2742 2743 nht = rcu_dereference_protected(tbl->nht, 2744 lockdep_is_held(&tbl->lock)); 2745 for (chain = 0; chain < (1 << nht->hash_shift); chain++) { 2746 struct neighbour *n; 2747 struct neighbour __rcu **np; 2748 2749 np = &nht->hash_buckets[chain]; 2750 while ((n = rcu_dereference_protected(*np, 2751 lockdep_is_held(&tbl->lock))) != NULL) { 2752 int release; 2753 2754 write_lock(&n->lock); 2755 release = cb(n); 2756 if (release) { 2757 rcu_assign_pointer(*np, 2758 rcu_dereference_protected(n->next, 2759 lockdep_is_held(&tbl->lock))); 2760 neigh_mark_dead(n); 2761 } else 2762 np = &n->next; 2763 write_unlock(&n->lock); 2764 if (release) 2765 neigh_cleanup_and_release(n); 2766 } 2767 } 2768 } 2769 EXPORT_SYMBOL(__neigh_for_each_release); 2770 2771 int neigh_xmit(int index, struct net_device *dev, 2772 const void *addr, struct sk_buff *skb) 2773 { 2774 int err = -EAFNOSUPPORT; 2775 if (likely(index < NEIGH_NR_TABLES)) { 2776 struct neigh_table *tbl; 2777 struct neighbour *neigh; 2778 2779 tbl = neigh_tables[index]; 2780 if (!tbl) 2781 goto out; 2782 rcu_read_lock_bh(); 2783 neigh = __neigh_lookup_noref(tbl, addr, dev); 2784 if (!neigh) 2785 neigh = __neigh_create(tbl, addr, dev, false); 2786 err = PTR_ERR(neigh); 2787 if (IS_ERR(neigh)) { 2788 rcu_read_unlock_bh(); 2789 goto out_kfree_skb; 2790 } 2791 err = neigh->output(neigh, skb); 2792 rcu_read_unlock_bh(); 2793 } 2794 else if (index == NEIGH_LINK_TABLE) { 2795 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 2796 addr, NULL, skb->len); 2797 if (err < 0) 2798 goto out_kfree_skb; 2799 err = dev_queue_xmit(skb); 2800 } 2801 out: 2802 return err; 2803 out_kfree_skb: 2804 kfree_skb(skb); 2805 goto out; 2806 } 2807 EXPORT_SYMBOL(neigh_xmit); 2808 2809 #ifdef CONFIG_PROC_FS 2810 2811 static struct neighbour *neigh_get_first(struct seq_file *seq) 2812 { 2813 struct neigh_seq_state *state = seq->private; 2814 struct net *net = seq_file_net(seq); 2815 struct neigh_hash_table *nht = state->nht; 2816 struct neighbour *n = NULL; 2817 int bucket = state->bucket; 2818 2819 state->flags &= ~NEIGH_SEQ_IS_PNEIGH; 2820 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { 2821 n = rcu_dereference_bh(nht->hash_buckets[bucket]); 2822 2823 while (n) { 2824 if (!net_eq(dev_net(n->dev), net)) 2825 goto next; 2826 if (state->neigh_sub_iter) { 2827 loff_t fakep = 0; 2828 void *v; 2829 2830 v = state->neigh_sub_iter(state, n, &fakep); 2831 if (!v) 2832 goto next; 2833 } 2834 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) 2835 break; 2836 if (n->nud_state & ~NUD_NOARP) 2837 break; 2838 next: 2839 n = rcu_dereference_bh(n->next); 2840 } 2841 2842 if (n) 2843 break; 2844 } 2845 state->bucket = bucket; 2846 2847 return n; 2848 } 2849 2850 static struct neighbour *neigh_get_next(struct seq_file *seq, 2851 struct neighbour *n, 2852 loff_t *pos) 2853 { 2854 struct neigh_seq_state *state = seq->private; 2855 struct net *net = seq_file_net(seq); 2856 struct neigh_hash_table *nht = state->nht; 2857 2858 if (state->neigh_sub_iter) { 2859 void *v = state->neigh_sub_iter(state, n, pos); 2860 if (v) 2861 return n; 2862 } 2863 n = rcu_dereference_bh(n->next); 2864 2865 while (1) { 2866 while (n) { 2867 if (!net_eq(dev_net(n->dev), net)) 2868 goto next; 2869 if (state->neigh_sub_iter) { 2870 void *v = state->neigh_sub_iter(state, n, pos); 2871 if (v) 2872 return n; 2873 goto next; 2874 } 2875 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) 2876 break; 2877 2878 if (n->nud_state & ~NUD_NOARP) 2879 break; 2880 next: 2881 n = rcu_dereference_bh(n->next); 2882 } 2883 2884 if (n) 2885 break; 2886 2887 if (++state->bucket >= (1 << nht->hash_shift)) 2888 break; 2889 2890 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]); 2891 } 2892 2893 if (n && pos) 2894 --(*pos); 2895 return n; 2896 } 2897 2898 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) 2899 { 2900 struct neighbour *n = neigh_get_first(seq); 2901 2902 if (n) { 2903 --(*pos); 2904 while (*pos) { 2905 n = neigh_get_next(seq, n, pos); 2906 if (!n) 2907 break; 2908 } 2909 } 2910 return *pos ? NULL : n; 2911 } 2912 2913 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) 2914 { 2915 struct neigh_seq_state *state = seq->private; 2916 struct net *net = seq_file_net(seq); 2917 struct neigh_table *tbl = state->tbl; 2918 struct pneigh_entry *pn = NULL; 2919 int bucket = state->bucket; 2920 2921 state->flags |= NEIGH_SEQ_IS_PNEIGH; 2922 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { 2923 pn = tbl->phash_buckets[bucket]; 2924 while (pn && !net_eq(pneigh_net(pn), net)) 2925 pn = pn->next; 2926 if (pn) 2927 break; 2928 } 2929 state->bucket = bucket; 2930 2931 return pn; 2932 } 2933 2934 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, 2935 struct pneigh_entry *pn, 2936 loff_t *pos) 2937 { 2938 struct neigh_seq_state *state = seq->private; 2939 struct net *net = seq_file_net(seq); 2940 struct neigh_table *tbl = state->tbl; 2941 2942 do { 2943 pn = pn->next; 2944 } while (pn && !net_eq(pneigh_net(pn), net)); 2945 2946 while (!pn) { 2947 if (++state->bucket > PNEIGH_HASHMASK) 2948 break; 2949 pn = tbl->phash_buckets[state->bucket]; 2950 while (pn && !net_eq(pneigh_net(pn), net)) 2951 pn = pn->next; 2952 if (pn) 2953 break; 2954 } 2955 2956 if (pn && pos) 2957 --(*pos); 2958 2959 return pn; 2960 } 2961 2962 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos) 2963 { 2964 struct pneigh_entry *pn = pneigh_get_first(seq); 2965 2966 if (pn) { 2967 --(*pos); 2968 while (*pos) { 2969 pn = pneigh_get_next(seq, pn, pos); 2970 if (!pn) 2971 break; 2972 } 2973 } 2974 return *pos ? NULL : pn; 2975 } 2976 2977 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) 2978 { 2979 struct neigh_seq_state *state = seq->private; 2980 void *rc; 2981 loff_t idxpos = *pos; 2982 2983 rc = neigh_get_idx(seq, &idxpos); 2984 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY)) 2985 rc = pneigh_get_idx(seq, &idxpos); 2986 2987 return rc; 2988 } 2989 2990 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) 2991 __acquires(rcu_bh) 2992 { 2993 struct neigh_seq_state *state = seq->private; 2994 2995 state->tbl = tbl; 2996 state->bucket = 0; 2997 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); 2998 2999 rcu_read_lock_bh(); 3000 state->nht = rcu_dereference_bh(tbl->nht); 3001 3002 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; 3003 } 3004 EXPORT_SYMBOL(neigh_seq_start); 3005 3006 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3007 { 3008 struct neigh_seq_state *state; 3009 void *rc; 3010 3011 if (v == SEQ_START_TOKEN) { 3012 rc = neigh_get_first(seq); 3013 goto out; 3014 } 3015 3016 state = seq->private; 3017 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) { 3018 rc = neigh_get_next(seq, v, NULL); 3019 if (rc) 3020 goto out; 3021 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY)) 3022 rc = pneigh_get_first(seq); 3023 } else { 3024 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY); 3025 rc = pneigh_get_next(seq, v, NULL); 3026 } 3027 out: 3028 ++(*pos); 3029 return rc; 3030 } 3031 EXPORT_SYMBOL(neigh_seq_next); 3032 3033 void neigh_seq_stop(struct seq_file *seq, void *v) 3034 __releases(rcu_bh) 3035 { 3036 rcu_read_unlock_bh(); 3037 } 3038 EXPORT_SYMBOL(neigh_seq_stop); 3039 3040 /* statistics via seq_file */ 3041 3042 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) 3043 { 3044 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3045 int cpu; 3046 3047 if (*pos == 0) 3048 return SEQ_START_TOKEN; 3049 3050 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { 3051 if (!cpu_possible(cpu)) 3052 continue; 3053 *pos = cpu+1; 3054 return per_cpu_ptr(tbl->stats, cpu); 3055 } 3056 return NULL; 3057 } 3058 3059 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3060 { 3061 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3062 int cpu; 3063 3064 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { 3065 if (!cpu_possible(cpu)) 3066 continue; 3067 *pos = cpu+1; 3068 return per_cpu_ptr(tbl->stats, cpu); 3069 } 3070 return NULL; 3071 } 3072 3073 static void neigh_stat_seq_stop(struct seq_file *seq, void *v) 3074 { 3075 3076 } 3077 3078 static int neigh_stat_seq_show(struct seq_file *seq, void *v) 3079 { 3080 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3081 struct neigh_statistics *st = v; 3082 3083 if (v == SEQ_START_TOKEN) { 3084 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n"); 3085 return 0; 3086 } 3087 3088 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " 3089 "%08lx %08lx %08lx %08lx %08lx %08lx\n", 3090 atomic_read(&tbl->entries), 3091 3092 st->allocs, 3093 st->destroys, 3094 st->hash_grows, 3095 3096 st->lookups, 3097 st->hits, 3098 3099 st->res_failed, 3100 3101 st->rcv_probes_mcast, 3102 st->rcv_probes_ucast, 3103 3104 st->periodic_gc_runs, 3105 st->forced_gc_runs, 3106 st->unres_discards, 3107 st->table_fulls 3108 ); 3109 3110 return 0; 3111 } 3112 3113 static const struct seq_operations neigh_stat_seq_ops = { 3114 .start = neigh_stat_seq_start, 3115 .next = neigh_stat_seq_next, 3116 .stop = neigh_stat_seq_stop, 3117 .show = neigh_stat_seq_show, 3118 }; 3119 #endif /* CONFIG_PROC_FS */ 3120 3121 static inline size_t neigh_nlmsg_size(void) 3122 { 3123 return NLMSG_ALIGN(sizeof(struct ndmsg)) 3124 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ 3125 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */ 3126 + nla_total_size(sizeof(struct nda_cacheinfo)) 3127 + nla_total_size(4) /* NDA_PROBES */ 3128 + nla_total_size(1); /* NDA_PROTOCOL */ 3129 } 3130 3131 static void __neigh_notify(struct neighbour *n, int type, int flags, 3132 u32 pid) 3133 { 3134 struct net *net = dev_net(n->dev); 3135 struct sk_buff *skb; 3136 int err = -ENOBUFS; 3137 3138 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC); 3139 if (skb == NULL) 3140 goto errout; 3141 3142 err = neigh_fill_info(skb, n, pid, 0, type, flags); 3143 if (err < 0) { 3144 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */ 3145 WARN_ON(err == -EMSGSIZE); 3146 kfree_skb(skb); 3147 goto errout; 3148 } 3149 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); 3150 return; 3151 errout: 3152 if (err < 0) 3153 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); 3154 } 3155 3156 void neigh_app_ns(struct neighbour *n) 3157 { 3158 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0); 3159 } 3160 EXPORT_SYMBOL(neigh_app_ns); 3161 3162 #ifdef CONFIG_SYSCTL 3163 static int zero; 3164 static int int_max = INT_MAX; 3165 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); 3166 3167 static int proc_unres_qlen(struct ctl_table *ctl, int write, 3168 void __user *buffer, size_t *lenp, loff_t *ppos) 3169 { 3170 int size, ret; 3171 struct ctl_table tmp = *ctl; 3172 3173 tmp.extra1 = &zero; 3174 tmp.extra2 = &unres_qlen_max; 3175 tmp.data = &size; 3176 3177 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN); 3178 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 3179 3180 if (write && !ret) 3181 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); 3182 return ret; 3183 } 3184 3185 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, 3186 int family) 3187 { 3188 switch (family) { 3189 case AF_INET: 3190 return __in_dev_arp_parms_get_rcu(dev); 3191 case AF_INET6: 3192 return __in6_dev_nd_parms_get_rcu(dev); 3193 } 3194 return NULL; 3195 } 3196 3197 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, 3198 int index) 3199 { 3200 struct net_device *dev; 3201 int family = neigh_parms_family(p); 3202 3203 rcu_read_lock(); 3204 for_each_netdev_rcu(net, dev) { 3205 struct neigh_parms *dst_p = 3206 neigh_get_dev_parms_rcu(dev, family); 3207 3208 if (dst_p && !test_bit(index, dst_p->data_state)) 3209 dst_p->data[index] = p->data[index]; 3210 } 3211 rcu_read_unlock(); 3212 } 3213 3214 static void neigh_proc_update(struct ctl_table *ctl, int write) 3215 { 3216 struct net_device *dev = ctl->extra1; 3217 struct neigh_parms *p = ctl->extra2; 3218 struct net *net = neigh_parms_net(p); 3219 int index = (int *) ctl->data - p->data; 3220 3221 if (!write) 3222 return; 3223 3224 set_bit(index, p->data_state); 3225 if (index == NEIGH_VAR_DELAY_PROBE_TIME) 3226 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); 3227 if (!dev) /* NULL dev means this is default value */ 3228 neigh_copy_dflt_parms(net, p, index); 3229 } 3230 3231 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, 3232 void __user *buffer, 3233 size_t *lenp, loff_t *ppos) 3234 { 3235 struct ctl_table tmp = *ctl; 3236 int ret; 3237 3238 tmp.extra1 = &zero; 3239 tmp.extra2 = &int_max; 3240 3241 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 3242 neigh_proc_update(ctl, write); 3243 return ret; 3244 } 3245 3246 int neigh_proc_dointvec(struct ctl_table *ctl, int write, 3247 void __user *buffer, size_t *lenp, loff_t *ppos) 3248 { 3249 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 3250 3251 neigh_proc_update(ctl, write); 3252 return ret; 3253 } 3254 EXPORT_SYMBOL(neigh_proc_dointvec); 3255 3256 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, 3257 void __user *buffer, 3258 size_t *lenp, loff_t *ppos) 3259 { 3260 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); 3261 3262 neigh_proc_update(ctl, write); 3263 return ret; 3264 } 3265 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies); 3266 3267 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write, 3268 void __user *buffer, 3269 size_t *lenp, loff_t *ppos) 3270 { 3271 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos); 3272 3273 neigh_proc_update(ctl, write); 3274 return ret; 3275 } 3276 3277 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, 3278 void __user *buffer, 3279 size_t *lenp, loff_t *ppos) 3280 { 3281 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); 3282 3283 neigh_proc_update(ctl, write); 3284 return ret; 3285 } 3286 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies); 3287 3288 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, 3289 void __user *buffer, 3290 size_t *lenp, loff_t *ppos) 3291 { 3292 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos); 3293 3294 neigh_proc_update(ctl, write); 3295 return ret; 3296 } 3297 3298 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write, 3299 void __user *buffer, 3300 size_t *lenp, loff_t *ppos) 3301 { 3302 struct neigh_parms *p = ctl->extra2; 3303 int ret; 3304 3305 if (strcmp(ctl->procname, "base_reachable_time") == 0) 3306 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); 3307 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0) 3308 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); 3309 else 3310 ret = -1; 3311 3312 if (write && ret == 0) { 3313 /* update reachable_time as well, otherwise, the change will 3314 * only be effective after the next time neigh_periodic_work 3315 * decides to recompute it 3316 */ 3317 p->reachable_time = 3318 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 3319 } 3320 return ret; 3321 } 3322 3323 #define NEIGH_PARMS_DATA_OFFSET(index) \ 3324 (&((struct neigh_parms *) 0)->data[index]) 3325 3326 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \ 3327 [NEIGH_VAR_ ## attr] = { \ 3328 .procname = name, \ 3329 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \ 3330 .maxlen = sizeof(int), \ 3331 .mode = mval, \ 3332 .proc_handler = proc, \ 3333 } 3334 3335 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \ 3336 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax) 3337 3338 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \ 3339 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies) 3340 3341 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \ 3342 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies) 3343 3344 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \ 3345 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies) 3346 3347 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \ 3348 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies) 3349 3350 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \ 3351 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen) 3352 3353 static struct neigh_sysctl_table { 3354 struct ctl_table_header *sysctl_header; 3355 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; 3356 } neigh_sysctl_template __read_mostly = { 3357 .neigh_vars = { 3358 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), 3359 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"), 3360 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"), 3361 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"), 3362 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), 3363 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), 3364 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), 3365 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"), 3366 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"), 3367 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"), 3368 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"), 3369 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"), 3370 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"), 3371 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"), 3372 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"), 3373 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"), 3374 [NEIGH_VAR_GC_INTERVAL] = { 3375 .procname = "gc_interval", 3376 .maxlen = sizeof(int), 3377 .mode = 0644, 3378 .proc_handler = proc_dointvec_jiffies, 3379 }, 3380 [NEIGH_VAR_GC_THRESH1] = { 3381 .procname = "gc_thresh1", 3382 .maxlen = sizeof(int), 3383 .mode = 0644, 3384 .extra1 = &zero, 3385 .extra2 = &int_max, 3386 .proc_handler = proc_dointvec_minmax, 3387 }, 3388 [NEIGH_VAR_GC_THRESH2] = { 3389 .procname = "gc_thresh2", 3390 .maxlen = sizeof(int), 3391 .mode = 0644, 3392 .extra1 = &zero, 3393 .extra2 = &int_max, 3394 .proc_handler = proc_dointvec_minmax, 3395 }, 3396 [NEIGH_VAR_GC_THRESH3] = { 3397 .procname = "gc_thresh3", 3398 .maxlen = sizeof(int), 3399 .mode = 0644, 3400 .extra1 = &zero, 3401 .extra2 = &int_max, 3402 .proc_handler = proc_dointvec_minmax, 3403 }, 3404 {}, 3405 }, 3406 }; 3407 3408 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, 3409 proc_handler *handler) 3410 { 3411 int i; 3412 struct neigh_sysctl_table *t; 3413 const char *dev_name_source; 3414 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; 3415 char *p_name; 3416 3417 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); 3418 if (!t) 3419 goto err; 3420 3421 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) { 3422 t->neigh_vars[i].data += (long) p; 3423 t->neigh_vars[i].extra1 = dev; 3424 t->neigh_vars[i].extra2 = p; 3425 } 3426 3427 if (dev) { 3428 dev_name_source = dev->name; 3429 /* Terminate the table early */ 3430 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, 3431 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); 3432 } else { 3433 struct neigh_table *tbl = p->tbl; 3434 dev_name_source = "default"; 3435 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval; 3436 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1; 3437 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2; 3438 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3; 3439 } 3440 3441 if (handler) { 3442 /* RetransTime */ 3443 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; 3444 /* ReachableTime */ 3445 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; 3446 /* RetransTime (in milliseconds)*/ 3447 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; 3448 /* ReachableTime (in milliseconds) */ 3449 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; 3450 } else { 3451 /* Those handlers will update p->reachable_time after 3452 * base_reachable_time(_ms) is set to ensure the new timer starts being 3453 * applied after the next neighbour update instead of waiting for 3454 * neigh_periodic_work to update its value (can be multiple minutes) 3455 * So any handler that replaces them should do this as well 3456 */ 3457 /* ReachableTime */ 3458 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = 3459 neigh_proc_base_reachable_time; 3460 /* ReachableTime (in milliseconds) */ 3461 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = 3462 neigh_proc_base_reachable_time; 3463 } 3464 3465 /* Don't export sysctls to unprivileged users */ 3466 if (neigh_parms_net(p)->user_ns != &init_user_ns) 3467 t->neigh_vars[0].procname = NULL; 3468 3469 switch (neigh_parms_family(p)) { 3470 case AF_INET: 3471 p_name = "ipv4"; 3472 break; 3473 case AF_INET6: 3474 p_name = "ipv6"; 3475 break; 3476 default: 3477 BUG(); 3478 } 3479 3480 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", 3481 p_name, dev_name_source); 3482 t->sysctl_header = 3483 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars); 3484 if (!t->sysctl_header) 3485 goto free; 3486 3487 p->sysctl_table = t; 3488 return 0; 3489 3490 free: 3491 kfree(t); 3492 err: 3493 return -ENOBUFS; 3494 } 3495 EXPORT_SYMBOL(neigh_sysctl_register); 3496 3497 void neigh_sysctl_unregister(struct neigh_parms *p) 3498 { 3499 if (p->sysctl_table) { 3500 struct neigh_sysctl_table *t = p->sysctl_table; 3501 p->sysctl_table = NULL; 3502 unregister_net_sysctl_table(t->sysctl_header); 3503 kfree(t); 3504 } 3505 } 3506 EXPORT_SYMBOL(neigh_sysctl_unregister); 3507 3508 #endif /* CONFIG_SYSCTL */ 3509 3510 static int __init neigh_init(void) 3511 { 3512 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0); 3513 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0); 3514 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0); 3515 3516 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, 3517 0); 3518 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0); 3519 3520 return 0; 3521 } 3522 3523 subsys_initcall(neigh_init); 3524