1 /* 2 * Generic address resolution entity 3 * 4 * Authors: 5 * Pedro Roque <roque@di.fc.ul.pt> 6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 * 13 * Fixes: 14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add. 15 * Harald Welte Add neighbour cache statistics like rtstat 16 */ 17 18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 19 20 #include <linux/slab.h> 21 #include <linux/types.h> 22 #include <linux/kernel.h> 23 #include <linux/module.h> 24 #include <linux/socket.h> 25 #include <linux/netdevice.h> 26 #include <linux/proc_fs.h> 27 #ifdef CONFIG_SYSCTL 28 #include <linux/sysctl.h> 29 #endif 30 #include <linux/times.h> 31 #include <net/net_namespace.h> 32 #include <net/neighbour.h> 33 #include <net/dst.h> 34 #include <net/sock.h> 35 #include <net/netevent.h> 36 #include <net/netlink.h> 37 #include <linux/rtnetlink.h> 38 #include <linux/random.h> 39 #include <linux/string.h> 40 #include <linux/log2.h> 41 #include <linux/inetdevice.h> 42 #include <net/addrconf.h> 43 44 #define DEBUG 45 #define NEIGH_DEBUG 1 46 #define neigh_dbg(level, fmt, ...) \ 47 do { \ 48 if (level <= NEIGH_DEBUG) \ 49 pr_debug(fmt, ##__VA_ARGS__); \ 50 } while (0) 51 52 #define PNEIGH_HASHMASK 0xF 53 54 static void neigh_timer_handler(struct timer_list *t); 55 static void __neigh_notify(struct neighbour *n, int type, int flags, 56 u32 pid); 57 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid); 58 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, 59 struct net_device *dev); 60 61 #ifdef CONFIG_PROC_FS 62 static const struct seq_operations neigh_stat_seq_ops; 63 #endif 64 65 /* 66 Neighbour hash table buckets are protected with rwlock tbl->lock. 67 68 - All the scans/updates to hash buckets MUST be made under this lock. 69 - NOTHING clever should be made under this lock: no callbacks 70 to protocol backends, no attempts to send something to network. 71 It will result in deadlocks, if backend/driver wants to use neighbour 72 cache. 73 - If the entry requires some non-trivial actions, increase 74 its reference count and release table lock. 75 76 Neighbour entries are protected: 77 - with reference count. 78 - with rwlock neigh->lock 79 80 Reference count prevents destruction. 81 82 neigh->lock mainly serializes ll address data and its validity state. 83 However, the same lock is used to protect another entry fields: 84 - timer 85 - resolution queue 86 87 Again, nothing clever shall be made under neigh->lock, 88 the most complicated procedure, which we allow is dev->hard_header. 89 It is supposed, that dev->hard_header is simplistic and does 90 not make callbacks to neighbour tables. 91 */ 92 93 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb) 94 { 95 kfree_skb(skb); 96 return -ENETDOWN; 97 } 98 99 static void neigh_cleanup_and_release(struct neighbour *neigh) 100 { 101 if (neigh->parms->neigh_cleanup) 102 neigh->parms->neigh_cleanup(neigh); 103 104 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0); 105 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); 106 neigh_release(neigh); 107 } 108 109 /* 110 * It is random distribution in the interval (1/2)*base...(3/2)*base. 111 * It corresponds to default IPv6 settings and is not overridable, 112 * because it is really reasonable choice. 113 */ 114 115 unsigned long neigh_rand_reach_time(unsigned long base) 116 { 117 return base ? (prandom_u32() % base) + (base >> 1) : 0; 118 } 119 EXPORT_SYMBOL(neigh_rand_reach_time); 120 121 static void neigh_mark_dead(struct neighbour *n) 122 { 123 n->dead = 1; 124 if (!list_empty(&n->gc_list)) { 125 list_del_init(&n->gc_list); 126 atomic_dec(&n->tbl->gc_entries); 127 } 128 } 129 130 static void neigh_update_gc_list(struct neighbour *n) 131 { 132 bool on_gc_list, exempt_from_gc; 133 134 write_lock_bh(&n->tbl->lock); 135 write_lock(&n->lock); 136 137 /* remove from the gc list if new state is permanent or if neighbor 138 * is externally learned; otherwise entry should be on the gc list 139 */ 140 exempt_from_gc = n->nud_state & NUD_PERMANENT || 141 n->flags & NTF_EXT_LEARNED; 142 on_gc_list = !list_empty(&n->gc_list); 143 144 if (exempt_from_gc && on_gc_list) { 145 list_del_init(&n->gc_list); 146 atomic_dec(&n->tbl->gc_entries); 147 } else if (!exempt_from_gc && !on_gc_list) { 148 /* add entries to the tail; cleaning removes from the front */ 149 list_add_tail(&n->gc_list, &n->tbl->gc_list); 150 atomic_inc(&n->tbl->gc_entries); 151 } 152 153 write_unlock(&n->lock); 154 write_unlock_bh(&n->tbl->lock); 155 } 156 157 static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags, 158 int *notify) 159 { 160 bool rc = false; 161 u8 ndm_flags; 162 163 if (!(flags & NEIGH_UPDATE_F_ADMIN)) 164 return rc; 165 166 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; 167 if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { 168 if (ndm_flags & NTF_EXT_LEARNED) 169 neigh->flags |= NTF_EXT_LEARNED; 170 else 171 neigh->flags &= ~NTF_EXT_LEARNED; 172 rc = true; 173 *notify = 1; 174 } 175 176 return rc; 177 } 178 179 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, 180 struct neigh_table *tbl) 181 { 182 bool retval = false; 183 184 write_lock(&n->lock); 185 if (refcount_read(&n->refcnt) == 1) { 186 struct neighbour *neigh; 187 188 neigh = rcu_dereference_protected(n->next, 189 lockdep_is_held(&tbl->lock)); 190 rcu_assign_pointer(*np, neigh); 191 neigh_mark_dead(n); 192 retval = true; 193 } 194 write_unlock(&n->lock); 195 if (retval) 196 neigh_cleanup_and_release(n); 197 return retval; 198 } 199 200 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) 201 { 202 struct neigh_hash_table *nht; 203 void *pkey = ndel->primary_key; 204 u32 hash_val; 205 struct neighbour *n; 206 struct neighbour __rcu **np; 207 208 nht = rcu_dereference_protected(tbl->nht, 209 lockdep_is_held(&tbl->lock)); 210 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd); 211 hash_val = hash_val >> (32 - nht->hash_shift); 212 213 np = &nht->hash_buckets[hash_val]; 214 while ((n = rcu_dereference_protected(*np, 215 lockdep_is_held(&tbl->lock)))) { 216 if (n == ndel) 217 return neigh_del(n, np, tbl); 218 np = &n->next; 219 } 220 return false; 221 } 222 223 static int neigh_forced_gc(struct neigh_table *tbl) 224 { 225 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2; 226 unsigned long tref = jiffies - 5 * HZ; 227 struct neighbour *n, *tmp; 228 int shrunk = 0; 229 230 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); 231 232 write_lock_bh(&tbl->lock); 233 234 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) { 235 if (refcount_read(&n->refcnt) == 1) { 236 bool remove = false; 237 238 write_lock(&n->lock); 239 if ((n->nud_state == NUD_FAILED) || 240 time_after(tref, n->updated)) 241 remove = true; 242 write_unlock(&n->lock); 243 244 if (remove && neigh_remove_one(n, tbl)) 245 shrunk++; 246 if (shrunk >= max_clean) 247 break; 248 } 249 } 250 251 tbl->last_flush = jiffies; 252 253 write_unlock_bh(&tbl->lock); 254 255 return shrunk; 256 } 257 258 static void neigh_add_timer(struct neighbour *n, unsigned long when) 259 { 260 neigh_hold(n); 261 if (unlikely(mod_timer(&n->timer, when))) { 262 printk("NEIGH: BUG, double timer add, state is %x\n", 263 n->nud_state); 264 dump_stack(); 265 } 266 } 267 268 static int neigh_del_timer(struct neighbour *n) 269 { 270 if ((n->nud_state & NUD_IN_TIMER) && 271 del_timer(&n->timer)) { 272 neigh_release(n); 273 return 1; 274 } 275 return 0; 276 } 277 278 static void pneigh_queue_purge(struct sk_buff_head *list) 279 { 280 struct sk_buff *skb; 281 282 while ((skb = skb_dequeue(list)) != NULL) { 283 dev_put(skb->dev); 284 kfree_skb(skb); 285 } 286 } 287 288 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, 289 bool skip_perm) 290 { 291 int i; 292 struct neigh_hash_table *nht; 293 294 nht = rcu_dereference_protected(tbl->nht, 295 lockdep_is_held(&tbl->lock)); 296 297 for (i = 0; i < (1 << nht->hash_shift); i++) { 298 struct neighbour *n; 299 struct neighbour __rcu **np = &nht->hash_buckets[i]; 300 301 while ((n = rcu_dereference_protected(*np, 302 lockdep_is_held(&tbl->lock))) != NULL) { 303 if (dev && n->dev != dev) { 304 np = &n->next; 305 continue; 306 } 307 if (skip_perm && n->nud_state & NUD_PERMANENT) { 308 np = &n->next; 309 continue; 310 } 311 rcu_assign_pointer(*np, 312 rcu_dereference_protected(n->next, 313 lockdep_is_held(&tbl->lock))); 314 write_lock(&n->lock); 315 neigh_del_timer(n); 316 neigh_mark_dead(n); 317 if (refcount_read(&n->refcnt) != 1) { 318 /* The most unpleasant situation. 319 We must destroy neighbour entry, 320 but someone still uses it. 321 322 The destroy will be delayed until 323 the last user releases us, but 324 we must kill timers etc. and move 325 it to safe state. 326 */ 327 __skb_queue_purge(&n->arp_queue); 328 n->arp_queue_len_bytes = 0; 329 n->output = neigh_blackhole; 330 if (n->nud_state & NUD_VALID) 331 n->nud_state = NUD_NOARP; 332 else 333 n->nud_state = NUD_NONE; 334 neigh_dbg(2, "neigh %p is stray\n", n); 335 } 336 write_unlock(&n->lock); 337 neigh_cleanup_and_release(n); 338 } 339 } 340 } 341 342 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) 343 { 344 write_lock_bh(&tbl->lock); 345 neigh_flush_dev(tbl, dev, false); 346 write_unlock_bh(&tbl->lock); 347 } 348 EXPORT_SYMBOL(neigh_changeaddr); 349 350 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, 351 bool skip_perm) 352 { 353 write_lock_bh(&tbl->lock); 354 neigh_flush_dev(tbl, dev, skip_perm); 355 pneigh_ifdown_and_unlock(tbl, dev); 356 357 del_timer_sync(&tbl->proxy_timer); 358 pneigh_queue_purge(&tbl->proxy_queue); 359 return 0; 360 } 361 362 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev) 363 { 364 __neigh_ifdown(tbl, dev, true); 365 return 0; 366 } 367 EXPORT_SYMBOL(neigh_carrier_down); 368 369 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) 370 { 371 __neigh_ifdown(tbl, dev, false); 372 return 0; 373 } 374 EXPORT_SYMBOL(neigh_ifdown); 375 376 static struct neighbour *neigh_alloc(struct neigh_table *tbl, 377 struct net_device *dev, 378 bool exempt_from_gc) 379 { 380 struct neighbour *n = NULL; 381 unsigned long now = jiffies; 382 int entries; 383 384 if (exempt_from_gc) 385 goto do_alloc; 386 387 entries = atomic_inc_return(&tbl->gc_entries) - 1; 388 if (entries >= tbl->gc_thresh3 || 389 (entries >= tbl->gc_thresh2 && 390 time_after(now, tbl->last_flush + 5 * HZ))) { 391 if (!neigh_forced_gc(tbl) && 392 entries >= tbl->gc_thresh3) { 393 net_info_ratelimited("%s: neighbor table overflow!\n", 394 tbl->id); 395 NEIGH_CACHE_STAT_INC(tbl, table_fulls); 396 goto out_entries; 397 } 398 } 399 400 do_alloc: 401 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC); 402 if (!n) 403 goto out_entries; 404 405 __skb_queue_head_init(&n->arp_queue); 406 rwlock_init(&n->lock); 407 seqlock_init(&n->ha_lock); 408 n->updated = n->used = now; 409 n->nud_state = NUD_NONE; 410 n->output = neigh_blackhole; 411 seqlock_init(&n->hh.hh_lock); 412 n->parms = neigh_parms_clone(&tbl->parms); 413 timer_setup(&n->timer, neigh_timer_handler, 0); 414 415 NEIGH_CACHE_STAT_INC(tbl, allocs); 416 n->tbl = tbl; 417 refcount_set(&n->refcnt, 1); 418 n->dead = 1; 419 INIT_LIST_HEAD(&n->gc_list); 420 421 atomic_inc(&tbl->entries); 422 out: 423 return n; 424 425 out_entries: 426 if (!exempt_from_gc) 427 atomic_dec(&tbl->gc_entries); 428 goto out; 429 } 430 431 static void neigh_get_hash_rnd(u32 *x) 432 { 433 *x = get_random_u32() | 1; 434 } 435 436 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) 437 { 438 size_t size = (1 << shift) * sizeof(struct neighbour *); 439 struct neigh_hash_table *ret; 440 struct neighbour __rcu **buckets; 441 int i; 442 443 ret = kmalloc(sizeof(*ret), GFP_ATOMIC); 444 if (!ret) 445 return NULL; 446 if (size <= PAGE_SIZE) 447 buckets = kzalloc(size, GFP_ATOMIC); 448 else 449 buckets = (struct neighbour __rcu **) 450 __get_free_pages(GFP_ATOMIC | __GFP_ZERO, 451 get_order(size)); 452 if (!buckets) { 453 kfree(ret); 454 return NULL; 455 } 456 ret->hash_buckets = buckets; 457 ret->hash_shift = shift; 458 for (i = 0; i < NEIGH_NUM_HASH_RND; i++) 459 neigh_get_hash_rnd(&ret->hash_rnd[i]); 460 return ret; 461 } 462 463 static void neigh_hash_free_rcu(struct rcu_head *head) 464 { 465 struct neigh_hash_table *nht = container_of(head, 466 struct neigh_hash_table, 467 rcu); 468 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); 469 struct neighbour __rcu **buckets = nht->hash_buckets; 470 471 if (size <= PAGE_SIZE) 472 kfree(buckets); 473 else 474 free_pages((unsigned long)buckets, get_order(size)); 475 kfree(nht); 476 } 477 478 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, 479 unsigned long new_shift) 480 { 481 unsigned int i, hash; 482 struct neigh_hash_table *new_nht, *old_nht; 483 484 NEIGH_CACHE_STAT_INC(tbl, hash_grows); 485 486 old_nht = rcu_dereference_protected(tbl->nht, 487 lockdep_is_held(&tbl->lock)); 488 new_nht = neigh_hash_alloc(new_shift); 489 if (!new_nht) 490 return old_nht; 491 492 for (i = 0; i < (1 << old_nht->hash_shift); i++) { 493 struct neighbour *n, *next; 494 495 for (n = rcu_dereference_protected(old_nht->hash_buckets[i], 496 lockdep_is_held(&tbl->lock)); 497 n != NULL; 498 n = next) { 499 hash = tbl->hash(n->primary_key, n->dev, 500 new_nht->hash_rnd); 501 502 hash >>= (32 - new_nht->hash_shift); 503 next = rcu_dereference_protected(n->next, 504 lockdep_is_held(&tbl->lock)); 505 506 rcu_assign_pointer(n->next, 507 rcu_dereference_protected( 508 new_nht->hash_buckets[hash], 509 lockdep_is_held(&tbl->lock))); 510 rcu_assign_pointer(new_nht->hash_buckets[hash], n); 511 } 512 } 513 514 rcu_assign_pointer(tbl->nht, new_nht); 515 call_rcu(&old_nht->rcu, neigh_hash_free_rcu); 516 return new_nht; 517 } 518 519 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, 520 struct net_device *dev) 521 { 522 struct neighbour *n; 523 524 NEIGH_CACHE_STAT_INC(tbl, lookups); 525 526 rcu_read_lock_bh(); 527 n = __neigh_lookup_noref(tbl, pkey, dev); 528 if (n) { 529 if (!refcount_inc_not_zero(&n->refcnt)) 530 n = NULL; 531 NEIGH_CACHE_STAT_INC(tbl, hits); 532 } 533 534 rcu_read_unlock_bh(); 535 return n; 536 } 537 EXPORT_SYMBOL(neigh_lookup); 538 539 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, 540 const void *pkey) 541 { 542 struct neighbour *n; 543 unsigned int key_len = tbl->key_len; 544 u32 hash_val; 545 struct neigh_hash_table *nht; 546 547 NEIGH_CACHE_STAT_INC(tbl, lookups); 548 549 rcu_read_lock_bh(); 550 nht = rcu_dereference_bh(tbl->nht); 551 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift); 552 553 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); 554 n != NULL; 555 n = rcu_dereference_bh(n->next)) { 556 if (!memcmp(n->primary_key, pkey, key_len) && 557 net_eq(dev_net(n->dev), net)) { 558 if (!refcount_inc_not_zero(&n->refcnt)) 559 n = NULL; 560 NEIGH_CACHE_STAT_INC(tbl, hits); 561 break; 562 } 563 } 564 565 rcu_read_unlock_bh(); 566 return n; 567 } 568 EXPORT_SYMBOL(neigh_lookup_nodev); 569 570 static struct neighbour *___neigh_create(struct neigh_table *tbl, 571 const void *pkey, 572 struct net_device *dev, 573 bool exempt_from_gc, bool want_ref) 574 { 575 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc); 576 u32 hash_val; 577 unsigned int key_len = tbl->key_len; 578 int error; 579 struct neigh_hash_table *nht; 580 581 if (!n) { 582 rc = ERR_PTR(-ENOBUFS); 583 goto out; 584 } 585 586 memcpy(n->primary_key, pkey, key_len); 587 n->dev = dev; 588 dev_hold(dev); 589 590 /* Protocol specific setup. */ 591 if (tbl->constructor && (error = tbl->constructor(n)) < 0) { 592 rc = ERR_PTR(error); 593 goto out_neigh_release; 594 } 595 596 if (dev->netdev_ops->ndo_neigh_construct) { 597 error = dev->netdev_ops->ndo_neigh_construct(dev, n); 598 if (error < 0) { 599 rc = ERR_PTR(error); 600 goto out_neigh_release; 601 } 602 } 603 604 /* Device specific setup. */ 605 if (n->parms->neigh_setup && 606 (error = n->parms->neigh_setup(n)) < 0) { 607 rc = ERR_PTR(error); 608 goto out_neigh_release; 609 } 610 611 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1); 612 613 write_lock_bh(&tbl->lock); 614 nht = rcu_dereference_protected(tbl->nht, 615 lockdep_is_held(&tbl->lock)); 616 617 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) 618 nht = neigh_hash_grow(tbl, nht->hash_shift + 1); 619 620 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift); 621 622 if (n->parms->dead) { 623 rc = ERR_PTR(-EINVAL); 624 goto out_tbl_unlock; 625 } 626 627 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], 628 lockdep_is_held(&tbl->lock)); 629 n1 != NULL; 630 n1 = rcu_dereference_protected(n1->next, 631 lockdep_is_held(&tbl->lock))) { 632 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { 633 if (want_ref) 634 neigh_hold(n1); 635 rc = n1; 636 goto out_tbl_unlock; 637 } 638 } 639 640 n->dead = 0; 641 if (!exempt_from_gc) 642 list_add_tail(&n->gc_list, &n->tbl->gc_list); 643 644 if (want_ref) 645 neigh_hold(n); 646 rcu_assign_pointer(n->next, 647 rcu_dereference_protected(nht->hash_buckets[hash_val], 648 lockdep_is_held(&tbl->lock))); 649 rcu_assign_pointer(nht->hash_buckets[hash_val], n); 650 write_unlock_bh(&tbl->lock); 651 neigh_dbg(2, "neigh %p is created\n", n); 652 rc = n; 653 out: 654 return rc; 655 out_tbl_unlock: 656 write_unlock_bh(&tbl->lock); 657 out_neigh_release: 658 neigh_release(n); 659 goto out; 660 } 661 662 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, 663 struct net_device *dev, bool want_ref) 664 { 665 return ___neigh_create(tbl, pkey, dev, false, want_ref); 666 } 667 EXPORT_SYMBOL(__neigh_create); 668 669 static u32 pneigh_hash(const void *pkey, unsigned int key_len) 670 { 671 u32 hash_val = *(u32 *)(pkey + key_len - 4); 672 hash_val ^= (hash_val >> 16); 673 hash_val ^= hash_val >> 8; 674 hash_val ^= hash_val >> 4; 675 hash_val &= PNEIGH_HASHMASK; 676 return hash_val; 677 } 678 679 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, 680 struct net *net, 681 const void *pkey, 682 unsigned int key_len, 683 struct net_device *dev) 684 { 685 while (n) { 686 if (!memcmp(n->key, pkey, key_len) && 687 net_eq(pneigh_net(n), net) && 688 (n->dev == dev || !n->dev)) 689 return n; 690 n = n->next; 691 } 692 return NULL; 693 } 694 695 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, 696 struct net *net, const void *pkey, struct net_device *dev) 697 { 698 unsigned int key_len = tbl->key_len; 699 u32 hash_val = pneigh_hash(pkey, key_len); 700 701 return __pneigh_lookup_1(tbl->phash_buckets[hash_val], 702 net, pkey, key_len, dev); 703 } 704 EXPORT_SYMBOL_GPL(__pneigh_lookup); 705 706 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, 707 struct net *net, const void *pkey, 708 struct net_device *dev, int creat) 709 { 710 struct pneigh_entry *n; 711 unsigned int key_len = tbl->key_len; 712 u32 hash_val = pneigh_hash(pkey, key_len); 713 714 read_lock_bh(&tbl->lock); 715 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val], 716 net, pkey, key_len, dev); 717 read_unlock_bh(&tbl->lock); 718 719 if (n || !creat) 720 goto out; 721 722 ASSERT_RTNL(); 723 724 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); 725 if (!n) 726 goto out; 727 728 n->protocol = 0; 729 write_pnet(&n->net, net); 730 memcpy(n->key, pkey, key_len); 731 n->dev = dev; 732 if (dev) 733 dev_hold(dev); 734 735 if (tbl->pconstructor && tbl->pconstructor(n)) { 736 if (dev) 737 dev_put(dev); 738 kfree(n); 739 n = NULL; 740 goto out; 741 } 742 743 write_lock_bh(&tbl->lock); 744 n->next = tbl->phash_buckets[hash_val]; 745 tbl->phash_buckets[hash_val] = n; 746 write_unlock_bh(&tbl->lock); 747 out: 748 return n; 749 } 750 EXPORT_SYMBOL(pneigh_lookup); 751 752 753 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, 754 struct net_device *dev) 755 { 756 struct pneigh_entry *n, **np; 757 unsigned int key_len = tbl->key_len; 758 u32 hash_val = pneigh_hash(pkey, key_len); 759 760 write_lock_bh(&tbl->lock); 761 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; 762 np = &n->next) { 763 if (!memcmp(n->key, pkey, key_len) && n->dev == dev && 764 net_eq(pneigh_net(n), net)) { 765 *np = n->next; 766 write_unlock_bh(&tbl->lock); 767 if (tbl->pdestructor) 768 tbl->pdestructor(n); 769 if (n->dev) 770 dev_put(n->dev); 771 kfree(n); 772 return 0; 773 } 774 } 775 write_unlock_bh(&tbl->lock); 776 return -ENOENT; 777 } 778 779 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, 780 struct net_device *dev) 781 { 782 struct pneigh_entry *n, **np, *freelist = NULL; 783 u32 h; 784 785 for (h = 0; h <= PNEIGH_HASHMASK; h++) { 786 np = &tbl->phash_buckets[h]; 787 while ((n = *np) != NULL) { 788 if (!dev || n->dev == dev) { 789 *np = n->next; 790 n->next = freelist; 791 freelist = n; 792 continue; 793 } 794 np = &n->next; 795 } 796 } 797 write_unlock_bh(&tbl->lock); 798 while ((n = freelist)) { 799 freelist = n->next; 800 n->next = NULL; 801 if (tbl->pdestructor) 802 tbl->pdestructor(n); 803 if (n->dev) 804 dev_put(n->dev); 805 kfree(n); 806 } 807 return -ENOENT; 808 } 809 810 static void neigh_parms_destroy(struct neigh_parms *parms); 811 812 static inline void neigh_parms_put(struct neigh_parms *parms) 813 { 814 if (refcount_dec_and_test(&parms->refcnt)) 815 neigh_parms_destroy(parms); 816 } 817 818 /* 819 * neighbour must already be out of the table; 820 * 821 */ 822 void neigh_destroy(struct neighbour *neigh) 823 { 824 struct net_device *dev = neigh->dev; 825 826 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); 827 828 if (!neigh->dead) { 829 pr_warn("Destroying alive neighbour %p\n", neigh); 830 dump_stack(); 831 return; 832 } 833 834 if (neigh_del_timer(neigh)) 835 pr_warn("Impossible event\n"); 836 837 write_lock_bh(&neigh->lock); 838 __skb_queue_purge(&neigh->arp_queue); 839 write_unlock_bh(&neigh->lock); 840 neigh->arp_queue_len_bytes = 0; 841 842 if (dev->netdev_ops->ndo_neigh_destroy) 843 dev->netdev_ops->ndo_neigh_destroy(dev, neigh); 844 845 dev_put(dev); 846 neigh_parms_put(neigh->parms); 847 848 neigh_dbg(2, "neigh %p is destroyed\n", neigh); 849 850 atomic_dec(&neigh->tbl->entries); 851 kfree_rcu(neigh, rcu); 852 } 853 EXPORT_SYMBOL(neigh_destroy); 854 855 /* Neighbour state is suspicious; 856 disable fast path. 857 858 Called with write_locked neigh. 859 */ 860 static void neigh_suspect(struct neighbour *neigh) 861 { 862 neigh_dbg(2, "neigh %p is suspected\n", neigh); 863 864 neigh->output = neigh->ops->output; 865 } 866 867 /* Neighbour state is OK; 868 enable fast path. 869 870 Called with write_locked neigh. 871 */ 872 static void neigh_connect(struct neighbour *neigh) 873 { 874 neigh_dbg(2, "neigh %p is connected\n", neigh); 875 876 neigh->output = neigh->ops->connected_output; 877 } 878 879 static void neigh_periodic_work(struct work_struct *work) 880 { 881 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); 882 struct neighbour *n; 883 struct neighbour __rcu **np; 884 unsigned int i; 885 struct neigh_hash_table *nht; 886 887 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); 888 889 write_lock_bh(&tbl->lock); 890 nht = rcu_dereference_protected(tbl->nht, 891 lockdep_is_held(&tbl->lock)); 892 893 /* 894 * periodically recompute ReachableTime from random function 895 */ 896 897 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) { 898 struct neigh_parms *p; 899 tbl->last_rand = jiffies; 900 list_for_each_entry(p, &tbl->parms_list, list) 901 p->reachable_time = 902 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 903 } 904 905 if (atomic_read(&tbl->entries) < tbl->gc_thresh1) 906 goto out; 907 908 for (i = 0 ; i < (1 << nht->hash_shift); i++) { 909 np = &nht->hash_buckets[i]; 910 911 while ((n = rcu_dereference_protected(*np, 912 lockdep_is_held(&tbl->lock))) != NULL) { 913 unsigned int state; 914 915 write_lock(&n->lock); 916 917 state = n->nud_state; 918 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) || 919 (n->flags & NTF_EXT_LEARNED)) { 920 write_unlock(&n->lock); 921 goto next_elt; 922 } 923 924 if (time_before(n->used, n->confirmed)) 925 n->used = n->confirmed; 926 927 if (refcount_read(&n->refcnt) == 1 && 928 (state == NUD_FAILED || 929 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { 930 *np = n->next; 931 neigh_mark_dead(n); 932 write_unlock(&n->lock); 933 neigh_cleanup_and_release(n); 934 continue; 935 } 936 write_unlock(&n->lock); 937 938 next_elt: 939 np = &n->next; 940 } 941 /* 942 * It's fine to release lock here, even if hash table 943 * grows while we are preempted. 944 */ 945 write_unlock_bh(&tbl->lock); 946 cond_resched(); 947 write_lock_bh(&tbl->lock); 948 nht = rcu_dereference_protected(tbl->nht, 949 lockdep_is_held(&tbl->lock)); 950 } 951 out: 952 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks. 953 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2 954 * BASE_REACHABLE_TIME. 955 */ 956 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 957 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1); 958 write_unlock_bh(&tbl->lock); 959 } 960 961 static __inline__ int neigh_max_probes(struct neighbour *n) 962 { 963 struct neigh_parms *p = n->parms; 964 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + 965 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) : 966 NEIGH_VAR(p, MCAST_PROBES)); 967 } 968 969 static void neigh_invalidate(struct neighbour *neigh) 970 __releases(neigh->lock) 971 __acquires(neigh->lock) 972 { 973 struct sk_buff *skb; 974 975 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); 976 neigh_dbg(2, "neigh %p is failed\n", neigh); 977 neigh->updated = jiffies; 978 979 /* It is very thin place. report_unreachable is very complicated 980 routine. Particularly, it can hit the same neighbour entry! 981 982 So that, we try to be accurate and avoid dead loop. --ANK 983 */ 984 while (neigh->nud_state == NUD_FAILED && 985 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { 986 write_unlock(&neigh->lock); 987 neigh->ops->error_report(neigh, skb); 988 write_lock(&neigh->lock); 989 } 990 __skb_queue_purge(&neigh->arp_queue); 991 neigh->arp_queue_len_bytes = 0; 992 } 993 994 static void neigh_probe(struct neighbour *neigh) 995 __releases(neigh->lock) 996 { 997 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue); 998 /* keep skb alive even if arp_queue overflows */ 999 if (skb) 1000 skb = skb_clone(skb, GFP_ATOMIC); 1001 write_unlock(&neigh->lock); 1002 if (neigh->ops->solicit) 1003 neigh->ops->solicit(neigh, skb); 1004 atomic_inc(&neigh->probes); 1005 kfree_skb(skb); 1006 } 1007 1008 /* Called when a timer expires for a neighbour entry. */ 1009 1010 static void neigh_timer_handler(struct timer_list *t) 1011 { 1012 unsigned long now, next; 1013 struct neighbour *neigh = from_timer(neigh, t, timer); 1014 unsigned int state; 1015 int notify = 0; 1016 1017 write_lock(&neigh->lock); 1018 1019 state = neigh->nud_state; 1020 now = jiffies; 1021 next = now + HZ; 1022 1023 if (!(state & NUD_IN_TIMER)) 1024 goto out; 1025 1026 if (state & NUD_REACHABLE) { 1027 if (time_before_eq(now, 1028 neigh->confirmed + neigh->parms->reachable_time)) { 1029 neigh_dbg(2, "neigh %p is still alive\n", neigh); 1030 next = neigh->confirmed + neigh->parms->reachable_time; 1031 } else if (time_before_eq(now, 1032 neigh->used + 1033 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { 1034 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1035 neigh->nud_state = NUD_DELAY; 1036 neigh->updated = jiffies; 1037 neigh_suspect(neigh); 1038 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); 1039 } else { 1040 neigh_dbg(2, "neigh %p is suspected\n", neigh); 1041 neigh->nud_state = NUD_STALE; 1042 neigh->updated = jiffies; 1043 neigh_suspect(neigh); 1044 notify = 1; 1045 } 1046 } else if (state & NUD_DELAY) { 1047 if (time_before_eq(now, 1048 neigh->confirmed + 1049 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { 1050 neigh_dbg(2, "neigh %p is now reachable\n", neigh); 1051 neigh->nud_state = NUD_REACHABLE; 1052 neigh->updated = jiffies; 1053 neigh_connect(neigh); 1054 notify = 1; 1055 next = neigh->confirmed + neigh->parms->reachable_time; 1056 } else { 1057 neigh_dbg(2, "neigh %p is probed\n", neigh); 1058 neigh->nud_state = NUD_PROBE; 1059 neigh->updated = jiffies; 1060 atomic_set(&neigh->probes, 0); 1061 notify = 1; 1062 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); 1063 } 1064 } else { 1065 /* NUD_PROBE|NUD_INCOMPLETE */ 1066 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); 1067 } 1068 1069 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && 1070 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { 1071 neigh->nud_state = NUD_FAILED; 1072 notify = 1; 1073 neigh_invalidate(neigh); 1074 goto out; 1075 } 1076 1077 if (neigh->nud_state & NUD_IN_TIMER) { 1078 if (time_before(next, jiffies + HZ/2)) 1079 next = jiffies + HZ/2; 1080 if (!mod_timer(&neigh->timer, next)) 1081 neigh_hold(neigh); 1082 } 1083 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { 1084 neigh_probe(neigh); 1085 } else { 1086 out: 1087 write_unlock(&neigh->lock); 1088 } 1089 1090 if (notify) 1091 neigh_update_notify(neigh, 0); 1092 1093 neigh_release(neigh); 1094 } 1095 1096 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) 1097 { 1098 int rc; 1099 bool immediate_probe = false; 1100 1101 write_lock_bh(&neigh->lock); 1102 1103 rc = 0; 1104 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) 1105 goto out_unlock_bh; 1106 if (neigh->dead) 1107 goto out_dead; 1108 1109 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { 1110 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + 1111 NEIGH_VAR(neigh->parms, APP_PROBES)) { 1112 unsigned long next, now = jiffies; 1113 1114 atomic_set(&neigh->probes, 1115 NEIGH_VAR(neigh->parms, UCAST_PROBES)); 1116 neigh->nud_state = NUD_INCOMPLETE; 1117 neigh->updated = now; 1118 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), 1119 HZ/2); 1120 neigh_add_timer(neigh, next); 1121 immediate_probe = true; 1122 } else { 1123 neigh->nud_state = NUD_FAILED; 1124 neigh->updated = jiffies; 1125 write_unlock_bh(&neigh->lock); 1126 1127 kfree_skb(skb); 1128 return 1; 1129 } 1130 } else if (neigh->nud_state & NUD_STALE) { 1131 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1132 neigh->nud_state = NUD_DELAY; 1133 neigh->updated = jiffies; 1134 neigh_add_timer(neigh, jiffies + 1135 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME)); 1136 } 1137 1138 if (neigh->nud_state == NUD_INCOMPLETE) { 1139 if (skb) { 1140 while (neigh->arp_queue_len_bytes + skb->truesize > 1141 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) { 1142 struct sk_buff *buff; 1143 1144 buff = __skb_dequeue(&neigh->arp_queue); 1145 if (!buff) 1146 break; 1147 neigh->arp_queue_len_bytes -= buff->truesize; 1148 kfree_skb(buff); 1149 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); 1150 } 1151 skb_dst_force(skb); 1152 __skb_queue_tail(&neigh->arp_queue, skb); 1153 neigh->arp_queue_len_bytes += skb->truesize; 1154 } 1155 rc = 1; 1156 } 1157 out_unlock_bh: 1158 if (immediate_probe) 1159 neigh_probe(neigh); 1160 else 1161 write_unlock(&neigh->lock); 1162 local_bh_enable(); 1163 return rc; 1164 1165 out_dead: 1166 if (neigh->nud_state & NUD_STALE) 1167 goto out_unlock_bh; 1168 write_unlock_bh(&neigh->lock); 1169 kfree_skb(skb); 1170 return 1; 1171 } 1172 EXPORT_SYMBOL(__neigh_event_send); 1173 1174 static void neigh_update_hhs(struct neighbour *neigh) 1175 { 1176 struct hh_cache *hh; 1177 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) 1178 = NULL; 1179 1180 if (neigh->dev->header_ops) 1181 update = neigh->dev->header_ops->cache_update; 1182 1183 if (update) { 1184 hh = &neigh->hh; 1185 if (hh->hh_len) { 1186 write_seqlock_bh(&hh->hh_lock); 1187 update(hh, neigh->dev, neigh->ha); 1188 write_sequnlock_bh(&hh->hh_lock); 1189 } 1190 } 1191 } 1192 1193 1194 1195 /* Generic update routine. 1196 -- lladdr is new lladdr or NULL, if it is not supplied. 1197 -- new is new state. 1198 -- flags 1199 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr, 1200 if it is different. 1201 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected" 1202 lladdr instead of overriding it 1203 if it is different. 1204 NEIGH_UPDATE_F_ADMIN means that the change is administrative. 1205 1206 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing 1207 NTF_ROUTER flag. 1208 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as 1209 a router. 1210 1211 Caller MUST hold reference count on the entry. 1212 */ 1213 1214 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, 1215 u8 new, u32 flags, u32 nlmsg_pid, 1216 struct netlink_ext_ack *extack) 1217 { 1218 bool ext_learn_change = false; 1219 u8 old; 1220 int err; 1221 int notify = 0; 1222 struct net_device *dev; 1223 int update_isrouter = 0; 1224 1225 write_lock_bh(&neigh->lock); 1226 1227 dev = neigh->dev; 1228 old = neigh->nud_state; 1229 err = -EPERM; 1230 1231 if (!(flags & NEIGH_UPDATE_F_ADMIN) && 1232 (old & (NUD_NOARP | NUD_PERMANENT))) 1233 goto out; 1234 if (neigh->dead) { 1235 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead"); 1236 goto out; 1237 } 1238 1239 ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); 1240 1241 if (!(new & NUD_VALID)) { 1242 neigh_del_timer(neigh); 1243 if (old & NUD_CONNECTED) 1244 neigh_suspect(neigh); 1245 neigh->nud_state = new; 1246 err = 0; 1247 notify = old & NUD_VALID; 1248 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && 1249 (new & NUD_FAILED)) { 1250 neigh_invalidate(neigh); 1251 notify = 1; 1252 } 1253 goto out; 1254 } 1255 1256 /* Compare new lladdr with cached one */ 1257 if (!dev->addr_len) { 1258 /* First case: device needs no address. */ 1259 lladdr = neigh->ha; 1260 } else if (lladdr) { 1261 /* The second case: if something is already cached 1262 and a new address is proposed: 1263 - compare new & old 1264 - if they are different, check override flag 1265 */ 1266 if ((old & NUD_VALID) && 1267 !memcmp(lladdr, neigh->ha, dev->addr_len)) 1268 lladdr = neigh->ha; 1269 } else { 1270 /* No address is supplied; if we know something, 1271 use it, otherwise discard the request. 1272 */ 1273 err = -EINVAL; 1274 if (!(old & NUD_VALID)) { 1275 NL_SET_ERR_MSG(extack, "No link layer address given"); 1276 goto out; 1277 } 1278 lladdr = neigh->ha; 1279 } 1280 1281 /* Update confirmed timestamp for neighbour entry after we 1282 * received ARP packet even if it doesn't change IP to MAC binding. 1283 */ 1284 if (new & NUD_CONNECTED) 1285 neigh->confirmed = jiffies; 1286 1287 /* If entry was valid and address is not changed, 1288 do not change entry state, if new one is STALE. 1289 */ 1290 err = 0; 1291 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER; 1292 if (old & NUD_VALID) { 1293 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) { 1294 update_isrouter = 0; 1295 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) && 1296 (old & NUD_CONNECTED)) { 1297 lladdr = neigh->ha; 1298 new = NUD_STALE; 1299 } else 1300 goto out; 1301 } else { 1302 if (lladdr == neigh->ha && new == NUD_STALE && 1303 !(flags & NEIGH_UPDATE_F_ADMIN)) 1304 new = old; 1305 } 1306 } 1307 1308 /* Update timestamp only once we know we will make a change to the 1309 * neighbour entry. Otherwise we risk to move the locktime window with 1310 * noop updates and ignore relevant ARP updates. 1311 */ 1312 if (new != old || lladdr != neigh->ha) 1313 neigh->updated = jiffies; 1314 1315 if (new != old) { 1316 neigh_del_timer(neigh); 1317 if (new & NUD_PROBE) 1318 atomic_set(&neigh->probes, 0); 1319 if (new & NUD_IN_TIMER) 1320 neigh_add_timer(neigh, (jiffies + 1321 ((new & NUD_REACHABLE) ? 1322 neigh->parms->reachable_time : 1323 0))); 1324 neigh->nud_state = new; 1325 notify = 1; 1326 } 1327 1328 if (lladdr != neigh->ha) { 1329 write_seqlock(&neigh->ha_lock); 1330 memcpy(&neigh->ha, lladdr, dev->addr_len); 1331 write_sequnlock(&neigh->ha_lock); 1332 neigh_update_hhs(neigh); 1333 if (!(new & NUD_CONNECTED)) 1334 neigh->confirmed = jiffies - 1335 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1); 1336 notify = 1; 1337 } 1338 if (new == old) 1339 goto out; 1340 if (new & NUD_CONNECTED) 1341 neigh_connect(neigh); 1342 else 1343 neigh_suspect(neigh); 1344 if (!(old & NUD_VALID)) { 1345 struct sk_buff *skb; 1346 1347 /* Again: avoid dead loop if something went wrong */ 1348 1349 while (neigh->nud_state & NUD_VALID && 1350 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { 1351 struct dst_entry *dst = skb_dst(skb); 1352 struct neighbour *n2, *n1 = neigh; 1353 write_unlock_bh(&neigh->lock); 1354 1355 rcu_read_lock(); 1356 1357 /* Why not just use 'neigh' as-is? The problem is that 1358 * things such as shaper, eql, and sch_teql can end up 1359 * using alternative, different, neigh objects to output 1360 * the packet in the output path. So what we need to do 1361 * here is re-lookup the top-level neigh in the path so 1362 * we can reinject the packet there. 1363 */ 1364 n2 = NULL; 1365 if (dst) { 1366 n2 = dst_neigh_lookup_skb(dst, skb); 1367 if (n2) 1368 n1 = n2; 1369 } 1370 n1->output(n1, skb); 1371 if (n2) 1372 neigh_release(n2); 1373 rcu_read_unlock(); 1374 1375 write_lock_bh(&neigh->lock); 1376 } 1377 __skb_queue_purge(&neigh->arp_queue); 1378 neigh->arp_queue_len_bytes = 0; 1379 } 1380 out: 1381 if (update_isrouter) 1382 neigh_update_is_router(neigh, flags, ¬ify); 1383 write_unlock_bh(&neigh->lock); 1384 1385 if (((new ^ old) & NUD_PERMANENT) || ext_learn_change) 1386 neigh_update_gc_list(neigh); 1387 1388 if (notify) 1389 neigh_update_notify(neigh, nlmsg_pid); 1390 1391 return err; 1392 } 1393 1394 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, 1395 u32 flags, u32 nlmsg_pid) 1396 { 1397 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL); 1398 } 1399 EXPORT_SYMBOL(neigh_update); 1400 1401 /* Update the neigh to listen temporarily for probe responses, even if it is 1402 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing. 1403 */ 1404 void __neigh_set_probe_once(struct neighbour *neigh) 1405 { 1406 if (neigh->dead) 1407 return; 1408 neigh->updated = jiffies; 1409 if (!(neigh->nud_state & NUD_FAILED)) 1410 return; 1411 neigh->nud_state = NUD_INCOMPLETE; 1412 atomic_set(&neigh->probes, neigh_max_probes(neigh)); 1413 neigh_add_timer(neigh, 1414 jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME)); 1415 } 1416 EXPORT_SYMBOL(__neigh_set_probe_once); 1417 1418 struct neighbour *neigh_event_ns(struct neigh_table *tbl, 1419 u8 *lladdr, void *saddr, 1420 struct net_device *dev) 1421 { 1422 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev, 1423 lladdr || !dev->addr_len); 1424 if (neigh) 1425 neigh_update(neigh, lladdr, NUD_STALE, 1426 NEIGH_UPDATE_F_OVERRIDE, 0); 1427 return neigh; 1428 } 1429 EXPORT_SYMBOL(neigh_event_ns); 1430 1431 /* called with read_lock_bh(&n->lock); */ 1432 static void neigh_hh_init(struct neighbour *n) 1433 { 1434 struct net_device *dev = n->dev; 1435 __be16 prot = n->tbl->protocol; 1436 struct hh_cache *hh = &n->hh; 1437 1438 write_lock_bh(&n->lock); 1439 1440 /* Only one thread can come in here and initialize the 1441 * hh_cache entry. 1442 */ 1443 if (!hh->hh_len) 1444 dev->header_ops->cache(n, hh, prot); 1445 1446 write_unlock_bh(&n->lock); 1447 } 1448 1449 /* Slow and careful. */ 1450 1451 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) 1452 { 1453 int rc = 0; 1454 1455 if (!neigh_event_send(neigh, skb)) { 1456 int err; 1457 struct net_device *dev = neigh->dev; 1458 unsigned int seq; 1459 1460 if (dev->header_ops->cache && !neigh->hh.hh_len) 1461 neigh_hh_init(neigh); 1462 1463 do { 1464 __skb_pull(skb, skb_network_offset(skb)); 1465 seq = read_seqbegin(&neigh->ha_lock); 1466 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1467 neigh->ha, NULL, skb->len); 1468 } while (read_seqretry(&neigh->ha_lock, seq)); 1469 1470 if (err >= 0) 1471 rc = dev_queue_xmit(skb); 1472 else 1473 goto out_kfree_skb; 1474 } 1475 out: 1476 return rc; 1477 out_kfree_skb: 1478 rc = -EINVAL; 1479 kfree_skb(skb); 1480 goto out; 1481 } 1482 EXPORT_SYMBOL(neigh_resolve_output); 1483 1484 /* As fast as possible without hh cache */ 1485 1486 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb) 1487 { 1488 struct net_device *dev = neigh->dev; 1489 unsigned int seq; 1490 int err; 1491 1492 do { 1493 __skb_pull(skb, skb_network_offset(skb)); 1494 seq = read_seqbegin(&neigh->ha_lock); 1495 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1496 neigh->ha, NULL, skb->len); 1497 } while (read_seqretry(&neigh->ha_lock, seq)); 1498 1499 if (err >= 0) 1500 err = dev_queue_xmit(skb); 1501 else { 1502 err = -EINVAL; 1503 kfree_skb(skb); 1504 } 1505 return err; 1506 } 1507 EXPORT_SYMBOL(neigh_connected_output); 1508 1509 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb) 1510 { 1511 return dev_queue_xmit(skb); 1512 } 1513 EXPORT_SYMBOL(neigh_direct_output); 1514 1515 static void neigh_proxy_process(struct timer_list *t) 1516 { 1517 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer); 1518 long sched_next = 0; 1519 unsigned long now = jiffies; 1520 struct sk_buff *skb, *n; 1521 1522 spin_lock(&tbl->proxy_queue.lock); 1523 1524 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) { 1525 long tdif = NEIGH_CB(skb)->sched_next - now; 1526 1527 if (tdif <= 0) { 1528 struct net_device *dev = skb->dev; 1529 1530 __skb_unlink(skb, &tbl->proxy_queue); 1531 if (tbl->proxy_redo && netif_running(dev)) { 1532 rcu_read_lock(); 1533 tbl->proxy_redo(skb); 1534 rcu_read_unlock(); 1535 } else { 1536 kfree_skb(skb); 1537 } 1538 1539 dev_put(dev); 1540 } else if (!sched_next || tdif < sched_next) 1541 sched_next = tdif; 1542 } 1543 del_timer(&tbl->proxy_timer); 1544 if (sched_next) 1545 mod_timer(&tbl->proxy_timer, jiffies + sched_next); 1546 spin_unlock(&tbl->proxy_queue.lock); 1547 } 1548 1549 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, 1550 struct sk_buff *skb) 1551 { 1552 unsigned long now = jiffies; 1553 1554 unsigned long sched_next = now + (prandom_u32() % 1555 NEIGH_VAR(p, PROXY_DELAY)); 1556 1557 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { 1558 kfree_skb(skb); 1559 return; 1560 } 1561 1562 NEIGH_CB(skb)->sched_next = sched_next; 1563 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED; 1564 1565 spin_lock(&tbl->proxy_queue.lock); 1566 if (del_timer(&tbl->proxy_timer)) { 1567 if (time_before(tbl->proxy_timer.expires, sched_next)) 1568 sched_next = tbl->proxy_timer.expires; 1569 } 1570 skb_dst_drop(skb); 1571 dev_hold(skb->dev); 1572 __skb_queue_tail(&tbl->proxy_queue, skb); 1573 mod_timer(&tbl->proxy_timer, sched_next); 1574 spin_unlock(&tbl->proxy_queue.lock); 1575 } 1576 EXPORT_SYMBOL(pneigh_enqueue); 1577 1578 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, 1579 struct net *net, int ifindex) 1580 { 1581 struct neigh_parms *p; 1582 1583 list_for_each_entry(p, &tbl->parms_list, list) { 1584 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) || 1585 (!p->dev && !ifindex && net_eq(net, &init_net))) 1586 return p; 1587 } 1588 1589 return NULL; 1590 } 1591 1592 struct neigh_parms *neigh_parms_alloc(struct net_device *dev, 1593 struct neigh_table *tbl) 1594 { 1595 struct neigh_parms *p; 1596 struct net *net = dev_net(dev); 1597 const struct net_device_ops *ops = dev->netdev_ops; 1598 1599 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); 1600 if (p) { 1601 p->tbl = tbl; 1602 refcount_set(&p->refcnt, 1); 1603 p->reachable_time = 1604 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 1605 dev_hold(dev); 1606 p->dev = dev; 1607 write_pnet(&p->net, net); 1608 p->sysctl_table = NULL; 1609 1610 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { 1611 dev_put(dev); 1612 kfree(p); 1613 return NULL; 1614 } 1615 1616 write_lock_bh(&tbl->lock); 1617 list_add(&p->list, &tbl->parms.list); 1618 write_unlock_bh(&tbl->lock); 1619 1620 neigh_parms_data_state_cleanall(p); 1621 } 1622 return p; 1623 } 1624 EXPORT_SYMBOL(neigh_parms_alloc); 1625 1626 static void neigh_rcu_free_parms(struct rcu_head *head) 1627 { 1628 struct neigh_parms *parms = 1629 container_of(head, struct neigh_parms, rcu_head); 1630 1631 neigh_parms_put(parms); 1632 } 1633 1634 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) 1635 { 1636 if (!parms || parms == &tbl->parms) 1637 return; 1638 write_lock_bh(&tbl->lock); 1639 list_del(&parms->list); 1640 parms->dead = 1; 1641 write_unlock_bh(&tbl->lock); 1642 if (parms->dev) 1643 dev_put(parms->dev); 1644 call_rcu(&parms->rcu_head, neigh_rcu_free_parms); 1645 } 1646 EXPORT_SYMBOL(neigh_parms_release); 1647 1648 static void neigh_parms_destroy(struct neigh_parms *parms) 1649 { 1650 kfree(parms); 1651 } 1652 1653 static struct lock_class_key neigh_table_proxy_queue_class; 1654 1655 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly; 1656 1657 void neigh_table_init(int index, struct neigh_table *tbl) 1658 { 1659 unsigned long now = jiffies; 1660 unsigned long phsize; 1661 1662 INIT_LIST_HEAD(&tbl->parms_list); 1663 INIT_LIST_HEAD(&tbl->gc_list); 1664 list_add(&tbl->parms.list, &tbl->parms_list); 1665 write_pnet(&tbl->parms.net, &init_net); 1666 refcount_set(&tbl->parms.refcnt, 1); 1667 tbl->parms.reachable_time = 1668 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); 1669 1670 tbl->stats = alloc_percpu(struct neigh_statistics); 1671 if (!tbl->stats) 1672 panic("cannot create neighbour cache statistics"); 1673 1674 #ifdef CONFIG_PROC_FS 1675 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat, 1676 &neigh_stat_seq_ops, tbl)) 1677 panic("cannot create neighbour proc dir entry"); 1678 #endif 1679 1680 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3)); 1681 1682 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); 1683 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); 1684 1685 if (!tbl->nht || !tbl->phash_buckets) 1686 panic("cannot allocate neighbour cache hashes"); 1687 1688 if (!tbl->entry_size) 1689 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) + 1690 tbl->key_len, NEIGH_PRIV_ALIGN); 1691 else 1692 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); 1693 1694 rwlock_init(&tbl->lock); 1695 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); 1696 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 1697 tbl->parms.reachable_time); 1698 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0); 1699 skb_queue_head_init_class(&tbl->proxy_queue, 1700 &neigh_table_proxy_queue_class); 1701 1702 tbl->last_flush = now; 1703 tbl->last_rand = now + tbl->parms.reachable_time * 20; 1704 1705 neigh_tables[index] = tbl; 1706 } 1707 EXPORT_SYMBOL(neigh_table_init); 1708 1709 int neigh_table_clear(int index, struct neigh_table *tbl) 1710 { 1711 neigh_tables[index] = NULL; 1712 /* It is not clean... Fix it to unload IPv6 module safely */ 1713 cancel_delayed_work_sync(&tbl->gc_work); 1714 del_timer_sync(&tbl->proxy_timer); 1715 pneigh_queue_purge(&tbl->proxy_queue); 1716 neigh_ifdown(tbl, NULL); 1717 if (atomic_read(&tbl->entries)) 1718 pr_crit("neighbour leakage\n"); 1719 1720 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, 1721 neigh_hash_free_rcu); 1722 tbl->nht = NULL; 1723 1724 kfree(tbl->phash_buckets); 1725 tbl->phash_buckets = NULL; 1726 1727 remove_proc_entry(tbl->id, init_net.proc_net_stat); 1728 1729 free_percpu(tbl->stats); 1730 tbl->stats = NULL; 1731 1732 return 0; 1733 } 1734 EXPORT_SYMBOL(neigh_table_clear); 1735 1736 static struct neigh_table *neigh_find_table(int family) 1737 { 1738 struct neigh_table *tbl = NULL; 1739 1740 switch (family) { 1741 case AF_INET: 1742 tbl = neigh_tables[NEIGH_ARP_TABLE]; 1743 break; 1744 case AF_INET6: 1745 tbl = neigh_tables[NEIGH_ND_TABLE]; 1746 break; 1747 case AF_DECnet: 1748 tbl = neigh_tables[NEIGH_DN_TABLE]; 1749 break; 1750 } 1751 1752 return tbl; 1753 } 1754 1755 const struct nla_policy nda_policy[NDA_MAX+1] = { 1756 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, 1757 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, 1758 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) }, 1759 [NDA_PROBES] = { .type = NLA_U32 }, 1760 [NDA_VLAN] = { .type = NLA_U16 }, 1761 [NDA_PORT] = { .type = NLA_U16 }, 1762 [NDA_VNI] = { .type = NLA_U32 }, 1763 [NDA_IFINDEX] = { .type = NLA_U32 }, 1764 [NDA_MASTER] = { .type = NLA_U32 }, 1765 [NDA_PROTOCOL] = { .type = NLA_U8 }, 1766 }; 1767 1768 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, 1769 struct netlink_ext_ack *extack) 1770 { 1771 struct net *net = sock_net(skb->sk); 1772 struct ndmsg *ndm; 1773 struct nlattr *dst_attr; 1774 struct neigh_table *tbl; 1775 struct neighbour *neigh; 1776 struct net_device *dev = NULL; 1777 int err = -EINVAL; 1778 1779 ASSERT_RTNL(); 1780 if (nlmsg_len(nlh) < sizeof(*ndm)) 1781 goto out; 1782 1783 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST); 1784 if (!dst_attr) { 1785 NL_SET_ERR_MSG(extack, "Network address not specified"); 1786 goto out; 1787 } 1788 1789 ndm = nlmsg_data(nlh); 1790 if (ndm->ndm_ifindex) { 1791 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 1792 if (dev == NULL) { 1793 err = -ENODEV; 1794 goto out; 1795 } 1796 } 1797 1798 tbl = neigh_find_table(ndm->ndm_family); 1799 if (tbl == NULL) 1800 return -EAFNOSUPPORT; 1801 1802 if (nla_len(dst_attr) < (int)tbl->key_len) { 1803 NL_SET_ERR_MSG(extack, "Invalid network address"); 1804 goto out; 1805 } 1806 1807 if (ndm->ndm_flags & NTF_PROXY) { 1808 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); 1809 goto out; 1810 } 1811 1812 if (dev == NULL) 1813 goto out; 1814 1815 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); 1816 if (neigh == NULL) { 1817 err = -ENOENT; 1818 goto out; 1819 } 1820 1821 err = __neigh_update(neigh, NULL, NUD_FAILED, 1822 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, 1823 NETLINK_CB(skb).portid, extack); 1824 write_lock_bh(&tbl->lock); 1825 neigh_release(neigh); 1826 neigh_remove_one(neigh, tbl); 1827 write_unlock_bh(&tbl->lock); 1828 1829 out: 1830 return err; 1831 } 1832 1833 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, 1834 struct netlink_ext_ack *extack) 1835 { 1836 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE | 1837 NEIGH_UPDATE_F_OVERRIDE_ISROUTER; 1838 struct net *net = sock_net(skb->sk); 1839 struct ndmsg *ndm; 1840 struct nlattr *tb[NDA_MAX+1]; 1841 struct neigh_table *tbl; 1842 struct net_device *dev = NULL; 1843 struct neighbour *neigh; 1844 void *dst, *lladdr; 1845 u8 protocol = 0; 1846 int err; 1847 1848 ASSERT_RTNL(); 1849 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nda_policy, extack); 1850 if (err < 0) 1851 goto out; 1852 1853 err = -EINVAL; 1854 if (!tb[NDA_DST]) { 1855 NL_SET_ERR_MSG(extack, "Network address not specified"); 1856 goto out; 1857 } 1858 1859 ndm = nlmsg_data(nlh); 1860 if (ndm->ndm_ifindex) { 1861 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 1862 if (dev == NULL) { 1863 err = -ENODEV; 1864 goto out; 1865 } 1866 1867 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) { 1868 NL_SET_ERR_MSG(extack, "Invalid link address"); 1869 goto out; 1870 } 1871 } 1872 1873 tbl = neigh_find_table(ndm->ndm_family); 1874 if (tbl == NULL) 1875 return -EAFNOSUPPORT; 1876 1877 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) { 1878 NL_SET_ERR_MSG(extack, "Invalid network address"); 1879 goto out; 1880 } 1881 1882 dst = nla_data(tb[NDA_DST]); 1883 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; 1884 1885 if (tb[NDA_PROTOCOL]) 1886 protocol = nla_get_u8(tb[NDA_PROTOCOL]); 1887 1888 if (ndm->ndm_flags & NTF_PROXY) { 1889 struct pneigh_entry *pn; 1890 1891 err = -ENOBUFS; 1892 pn = pneigh_lookup(tbl, net, dst, dev, 1); 1893 if (pn) { 1894 pn->flags = ndm->ndm_flags; 1895 if (protocol) 1896 pn->protocol = protocol; 1897 err = 0; 1898 } 1899 goto out; 1900 } 1901 1902 if (!dev) { 1903 NL_SET_ERR_MSG(extack, "Device not specified"); 1904 goto out; 1905 } 1906 1907 neigh = neigh_lookup(tbl, dst, dev); 1908 if (neigh == NULL) { 1909 bool exempt_from_gc; 1910 1911 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { 1912 err = -ENOENT; 1913 goto out; 1914 } 1915 1916 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || 1917 ndm->ndm_flags & NTF_EXT_LEARNED; 1918 neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true); 1919 if (IS_ERR(neigh)) { 1920 err = PTR_ERR(neigh); 1921 goto out; 1922 } 1923 } else { 1924 if (nlh->nlmsg_flags & NLM_F_EXCL) { 1925 err = -EEXIST; 1926 neigh_release(neigh); 1927 goto out; 1928 } 1929 1930 if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) 1931 flags &= ~(NEIGH_UPDATE_F_OVERRIDE | 1932 NEIGH_UPDATE_F_OVERRIDE_ISROUTER); 1933 } 1934 1935 if (ndm->ndm_flags & NTF_EXT_LEARNED) 1936 flags |= NEIGH_UPDATE_F_EXT_LEARNED; 1937 1938 if (ndm->ndm_flags & NTF_ROUTER) 1939 flags |= NEIGH_UPDATE_F_ISROUTER; 1940 1941 if (ndm->ndm_flags & NTF_USE) { 1942 neigh_event_send(neigh, NULL); 1943 err = 0; 1944 } else 1945 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, 1946 NETLINK_CB(skb).portid, extack); 1947 1948 if (protocol) 1949 neigh->protocol = protocol; 1950 1951 neigh_release(neigh); 1952 1953 out: 1954 return err; 1955 } 1956 1957 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) 1958 { 1959 struct nlattr *nest; 1960 1961 nest = nla_nest_start(skb, NDTA_PARMS); 1962 if (nest == NULL) 1963 return -ENOBUFS; 1964 1965 if ((parms->dev && 1966 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || 1967 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) || 1968 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, 1969 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) || 1970 /* approximative value for deprecated QUEUE_LEN (in packets) */ 1971 nla_put_u32(skb, NDTPA_QUEUE_LEN, 1972 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) || 1973 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) || 1974 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) || 1975 nla_put_u32(skb, NDTPA_UCAST_PROBES, 1976 NEIGH_VAR(parms, UCAST_PROBES)) || 1977 nla_put_u32(skb, NDTPA_MCAST_PROBES, 1978 NEIGH_VAR(parms, MCAST_PROBES)) || 1979 nla_put_u32(skb, NDTPA_MCAST_REPROBES, 1980 NEIGH_VAR(parms, MCAST_REPROBES)) || 1981 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time, 1982 NDTPA_PAD) || 1983 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, 1984 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) || 1985 nla_put_msecs(skb, NDTPA_GC_STALETIME, 1986 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) || 1987 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, 1988 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) || 1989 nla_put_msecs(skb, NDTPA_RETRANS_TIME, 1990 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) || 1991 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, 1992 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) || 1993 nla_put_msecs(skb, NDTPA_PROXY_DELAY, 1994 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) || 1995 nla_put_msecs(skb, NDTPA_LOCKTIME, 1996 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD)) 1997 goto nla_put_failure; 1998 return nla_nest_end(skb, nest); 1999 2000 nla_put_failure: 2001 nla_nest_cancel(skb, nest); 2002 return -EMSGSIZE; 2003 } 2004 2005 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, 2006 u32 pid, u32 seq, int type, int flags) 2007 { 2008 struct nlmsghdr *nlh; 2009 struct ndtmsg *ndtmsg; 2010 2011 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); 2012 if (nlh == NULL) 2013 return -EMSGSIZE; 2014 2015 ndtmsg = nlmsg_data(nlh); 2016 2017 read_lock_bh(&tbl->lock); 2018 ndtmsg->ndtm_family = tbl->family; 2019 ndtmsg->ndtm_pad1 = 0; 2020 ndtmsg->ndtm_pad2 = 0; 2021 2022 if (nla_put_string(skb, NDTA_NAME, tbl->id) || 2023 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) || 2024 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) || 2025 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) || 2026 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3)) 2027 goto nla_put_failure; 2028 { 2029 unsigned long now = jiffies; 2030 unsigned int flush_delta = now - tbl->last_flush; 2031 unsigned int rand_delta = now - tbl->last_rand; 2032 struct neigh_hash_table *nht; 2033 struct ndt_config ndc = { 2034 .ndtc_key_len = tbl->key_len, 2035 .ndtc_entry_size = tbl->entry_size, 2036 .ndtc_entries = atomic_read(&tbl->entries), 2037 .ndtc_last_flush = jiffies_to_msecs(flush_delta), 2038 .ndtc_last_rand = jiffies_to_msecs(rand_delta), 2039 .ndtc_proxy_qlen = tbl->proxy_queue.qlen, 2040 }; 2041 2042 rcu_read_lock_bh(); 2043 nht = rcu_dereference_bh(tbl->nht); 2044 ndc.ndtc_hash_rnd = nht->hash_rnd[0]; 2045 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); 2046 rcu_read_unlock_bh(); 2047 2048 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc)) 2049 goto nla_put_failure; 2050 } 2051 2052 { 2053 int cpu; 2054 struct ndt_stats ndst; 2055 2056 memset(&ndst, 0, sizeof(ndst)); 2057 2058 for_each_possible_cpu(cpu) { 2059 struct neigh_statistics *st; 2060 2061 st = per_cpu_ptr(tbl->stats, cpu); 2062 ndst.ndts_allocs += st->allocs; 2063 ndst.ndts_destroys += st->destroys; 2064 ndst.ndts_hash_grows += st->hash_grows; 2065 ndst.ndts_res_failed += st->res_failed; 2066 ndst.ndts_lookups += st->lookups; 2067 ndst.ndts_hits += st->hits; 2068 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast; 2069 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast; 2070 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs; 2071 ndst.ndts_forced_gc_runs += st->forced_gc_runs; 2072 ndst.ndts_table_fulls += st->table_fulls; 2073 } 2074 2075 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst, 2076 NDTA_PAD)) 2077 goto nla_put_failure; 2078 } 2079 2080 BUG_ON(tbl->parms.dev); 2081 if (neightbl_fill_parms(skb, &tbl->parms) < 0) 2082 goto nla_put_failure; 2083 2084 read_unlock_bh(&tbl->lock); 2085 nlmsg_end(skb, nlh); 2086 return 0; 2087 2088 nla_put_failure: 2089 read_unlock_bh(&tbl->lock); 2090 nlmsg_cancel(skb, nlh); 2091 return -EMSGSIZE; 2092 } 2093 2094 static int neightbl_fill_param_info(struct sk_buff *skb, 2095 struct neigh_table *tbl, 2096 struct neigh_parms *parms, 2097 u32 pid, u32 seq, int type, 2098 unsigned int flags) 2099 { 2100 struct ndtmsg *ndtmsg; 2101 struct nlmsghdr *nlh; 2102 2103 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); 2104 if (nlh == NULL) 2105 return -EMSGSIZE; 2106 2107 ndtmsg = nlmsg_data(nlh); 2108 2109 read_lock_bh(&tbl->lock); 2110 ndtmsg->ndtm_family = tbl->family; 2111 ndtmsg->ndtm_pad1 = 0; 2112 ndtmsg->ndtm_pad2 = 0; 2113 2114 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 || 2115 neightbl_fill_parms(skb, parms) < 0) 2116 goto errout; 2117 2118 read_unlock_bh(&tbl->lock); 2119 nlmsg_end(skb, nlh); 2120 return 0; 2121 errout: 2122 read_unlock_bh(&tbl->lock); 2123 nlmsg_cancel(skb, nlh); 2124 return -EMSGSIZE; 2125 } 2126 2127 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { 2128 [NDTA_NAME] = { .type = NLA_STRING }, 2129 [NDTA_THRESH1] = { .type = NLA_U32 }, 2130 [NDTA_THRESH2] = { .type = NLA_U32 }, 2131 [NDTA_THRESH3] = { .type = NLA_U32 }, 2132 [NDTA_GC_INTERVAL] = { .type = NLA_U64 }, 2133 [NDTA_PARMS] = { .type = NLA_NESTED }, 2134 }; 2135 2136 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { 2137 [NDTPA_IFINDEX] = { .type = NLA_U32 }, 2138 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, 2139 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, 2140 [NDTPA_APP_PROBES] = { .type = NLA_U32 }, 2141 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, 2142 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 }, 2143 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 }, 2144 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, 2145 [NDTPA_GC_STALETIME] = { .type = NLA_U64 }, 2146 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 }, 2147 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 }, 2148 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 }, 2149 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 }, 2150 [NDTPA_LOCKTIME] = { .type = NLA_U64 }, 2151 }; 2152 2153 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, 2154 struct netlink_ext_ack *extack) 2155 { 2156 struct net *net = sock_net(skb->sk); 2157 struct neigh_table *tbl; 2158 struct ndtmsg *ndtmsg; 2159 struct nlattr *tb[NDTA_MAX+1]; 2160 bool found = false; 2161 int err, tidx; 2162 2163 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, 2164 nl_neightbl_policy, extack); 2165 if (err < 0) 2166 goto errout; 2167 2168 if (tb[NDTA_NAME] == NULL) { 2169 err = -EINVAL; 2170 goto errout; 2171 } 2172 2173 ndtmsg = nlmsg_data(nlh); 2174 2175 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { 2176 tbl = neigh_tables[tidx]; 2177 if (!tbl) 2178 continue; 2179 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) 2180 continue; 2181 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) { 2182 found = true; 2183 break; 2184 } 2185 } 2186 2187 if (!found) 2188 return -ENOENT; 2189 2190 /* 2191 * We acquire tbl->lock to be nice to the periodic timers and 2192 * make sure they always see a consistent set of values. 2193 */ 2194 write_lock_bh(&tbl->lock); 2195 2196 if (tb[NDTA_PARMS]) { 2197 struct nlattr *tbp[NDTPA_MAX+1]; 2198 struct neigh_parms *p; 2199 int i, ifindex = 0; 2200 2201 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS], 2202 nl_ntbl_parm_policy, extack); 2203 if (err < 0) 2204 goto errout_tbl_lock; 2205 2206 if (tbp[NDTPA_IFINDEX]) 2207 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); 2208 2209 p = lookup_neigh_parms(tbl, net, ifindex); 2210 if (p == NULL) { 2211 err = -ENOENT; 2212 goto errout_tbl_lock; 2213 } 2214 2215 for (i = 1; i <= NDTPA_MAX; i++) { 2216 if (tbp[i] == NULL) 2217 continue; 2218 2219 switch (i) { 2220 case NDTPA_QUEUE_LEN: 2221 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, 2222 nla_get_u32(tbp[i]) * 2223 SKB_TRUESIZE(ETH_FRAME_LEN)); 2224 break; 2225 case NDTPA_QUEUE_LENBYTES: 2226 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, 2227 nla_get_u32(tbp[i])); 2228 break; 2229 case NDTPA_PROXY_QLEN: 2230 NEIGH_VAR_SET(p, PROXY_QLEN, 2231 nla_get_u32(tbp[i])); 2232 break; 2233 case NDTPA_APP_PROBES: 2234 NEIGH_VAR_SET(p, APP_PROBES, 2235 nla_get_u32(tbp[i])); 2236 break; 2237 case NDTPA_UCAST_PROBES: 2238 NEIGH_VAR_SET(p, UCAST_PROBES, 2239 nla_get_u32(tbp[i])); 2240 break; 2241 case NDTPA_MCAST_PROBES: 2242 NEIGH_VAR_SET(p, MCAST_PROBES, 2243 nla_get_u32(tbp[i])); 2244 break; 2245 case NDTPA_MCAST_REPROBES: 2246 NEIGH_VAR_SET(p, MCAST_REPROBES, 2247 nla_get_u32(tbp[i])); 2248 break; 2249 case NDTPA_BASE_REACHABLE_TIME: 2250 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, 2251 nla_get_msecs(tbp[i])); 2252 /* update reachable_time as well, otherwise, the change will 2253 * only be effective after the next time neigh_periodic_work 2254 * decides to recompute it (can be multiple minutes) 2255 */ 2256 p->reachable_time = 2257 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 2258 break; 2259 case NDTPA_GC_STALETIME: 2260 NEIGH_VAR_SET(p, GC_STALETIME, 2261 nla_get_msecs(tbp[i])); 2262 break; 2263 case NDTPA_DELAY_PROBE_TIME: 2264 NEIGH_VAR_SET(p, DELAY_PROBE_TIME, 2265 nla_get_msecs(tbp[i])); 2266 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); 2267 break; 2268 case NDTPA_RETRANS_TIME: 2269 NEIGH_VAR_SET(p, RETRANS_TIME, 2270 nla_get_msecs(tbp[i])); 2271 break; 2272 case NDTPA_ANYCAST_DELAY: 2273 NEIGH_VAR_SET(p, ANYCAST_DELAY, 2274 nla_get_msecs(tbp[i])); 2275 break; 2276 case NDTPA_PROXY_DELAY: 2277 NEIGH_VAR_SET(p, PROXY_DELAY, 2278 nla_get_msecs(tbp[i])); 2279 break; 2280 case NDTPA_LOCKTIME: 2281 NEIGH_VAR_SET(p, LOCKTIME, 2282 nla_get_msecs(tbp[i])); 2283 break; 2284 } 2285 } 2286 } 2287 2288 err = -ENOENT; 2289 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] || 2290 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) && 2291 !net_eq(net, &init_net)) 2292 goto errout_tbl_lock; 2293 2294 if (tb[NDTA_THRESH1]) 2295 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); 2296 2297 if (tb[NDTA_THRESH2]) 2298 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]); 2299 2300 if (tb[NDTA_THRESH3]) 2301 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]); 2302 2303 if (tb[NDTA_GC_INTERVAL]) 2304 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]); 2305 2306 err = 0; 2307 2308 errout_tbl_lock: 2309 write_unlock_bh(&tbl->lock); 2310 errout: 2311 return err; 2312 } 2313 2314 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh, 2315 struct netlink_ext_ack *extack) 2316 { 2317 struct ndtmsg *ndtm; 2318 2319 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) { 2320 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request"); 2321 return -EINVAL; 2322 } 2323 2324 ndtm = nlmsg_data(nlh); 2325 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) { 2326 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request"); 2327 return -EINVAL; 2328 } 2329 2330 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) { 2331 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request"); 2332 return -EINVAL; 2333 } 2334 2335 return 0; 2336 } 2337 2338 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2339 { 2340 const struct nlmsghdr *nlh = cb->nlh; 2341 struct net *net = sock_net(skb->sk); 2342 int family, tidx, nidx = 0; 2343 int tbl_skip = cb->args[0]; 2344 int neigh_skip = cb->args[1]; 2345 struct neigh_table *tbl; 2346 2347 if (cb->strict_check) { 2348 int err = neightbl_valid_dump_info(nlh, cb->extack); 2349 2350 if (err < 0) 2351 return err; 2352 } 2353 2354 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; 2355 2356 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { 2357 struct neigh_parms *p; 2358 2359 tbl = neigh_tables[tidx]; 2360 if (!tbl) 2361 continue; 2362 2363 if (tidx < tbl_skip || (family && tbl->family != family)) 2364 continue; 2365 2366 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, 2367 nlh->nlmsg_seq, RTM_NEWNEIGHTBL, 2368 NLM_F_MULTI) < 0) 2369 break; 2370 2371 nidx = 0; 2372 p = list_next_entry(&tbl->parms, list); 2373 list_for_each_entry_from(p, &tbl->parms_list, list) { 2374 if (!net_eq(neigh_parms_net(p), net)) 2375 continue; 2376 2377 if (nidx < neigh_skip) 2378 goto next; 2379 2380 if (neightbl_fill_param_info(skb, tbl, p, 2381 NETLINK_CB(cb->skb).portid, 2382 nlh->nlmsg_seq, 2383 RTM_NEWNEIGHTBL, 2384 NLM_F_MULTI) < 0) 2385 goto out; 2386 next: 2387 nidx++; 2388 } 2389 2390 neigh_skip = 0; 2391 } 2392 out: 2393 cb->args[0] = tidx; 2394 cb->args[1] = nidx; 2395 2396 return skb->len; 2397 } 2398 2399 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, 2400 u32 pid, u32 seq, int type, unsigned int flags) 2401 { 2402 unsigned long now = jiffies; 2403 struct nda_cacheinfo ci; 2404 struct nlmsghdr *nlh; 2405 struct ndmsg *ndm; 2406 2407 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); 2408 if (nlh == NULL) 2409 return -EMSGSIZE; 2410 2411 ndm = nlmsg_data(nlh); 2412 ndm->ndm_family = neigh->ops->family; 2413 ndm->ndm_pad1 = 0; 2414 ndm->ndm_pad2 = 0; 2415 ndm->ndm_flags = neigh->flags; 2416 ndm->ndm_type = neigh->type; 2417 ndm->ndm_ifindex = neigh->dev->ifindex; 2418 2419 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key)) 2420 goto nla_put_failure; 2421 2422 read_lock_bh(&neigh->lock); 2423 ndm->ndm_state = neigh->nud_state; 2424 if (neigh->nud_state & NUD_VALID) { 2425 char haddr[MAX_ADDR_LEN]; 2426 2427 neigh_ha_snapshot(haddr, neigh, neigh->dev); 2428 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) { 2429 read_unlock_bh(&neigh->lock); 2430 goto nla_put_failure; 2431 } 2432 } 2433 2434 ci.ndm_used = jiffies_to_clock_t(now - neigh->used); 2435 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed); 2436 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated); 2437 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1; 2438 read_unlock_bh(&neigh->lock); 2439 2440 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) || 2441 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) 2442 goto nla_put_failure; 2443 2444 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol)) 2445 goto nla_put_failure; 2446 2447 nlmsg_end(skb, nlh); 2448 return 0; 2449 2450 nla_put_failure: 2451 nlmsg_cancel(skb, nlh); 2452 return -EMSGSIZE; 2453 } 2454 2455 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, 2456 u32 pid, u32 seq, int type, unsigned int flags, 2457 struct neigh_table *tbl) 2458 { 2459 struct nlmsghdr *nlh; 2460 struct ndmsg *ndm; 2461 2462 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); 2463 if (nlh == NULL) 2464 return -EMSGSIZE; 2465 2466 ndm = nlmsg_data(nlh); 2467 ndm->ndm_family = tbl->family; 2468 ndm->ndm_pad1 = 0; 2469 ndm->ndm_pad2 = 0; 2470 ndm->ndm_flags = pn->flags | NTF_PROXY; 2471 ndm->ndm_type = RTN_UNICAST; 2472 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0; 2473 ndm->ndm_state = NUD_NONE; 2474 2475 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) 2476 goto nla_put_failure; 2477 2478 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol)) 2479 goto nla_put_failure; 2480 2481 nlmsg_end(skb, nlh); 2482 return 0; 2483 2484 nla_put_failure: 2485 nlmsg_cancel(skb, nlh); 2486 return -EMSGSIZE; 2487 } 2488 2489 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid) 2490 { 2491 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); 2492 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid); 2493 } 2494 2495 static bool neigh_master_filtered(struct net_device *dev, int master_idx) 2496 { 2497 struct net_device *master; 2498 2499 if (!master_idx) 2500 return false; 2501 2502 master = dev ? netdev_master_upper_dev_get(dev) : NULL; 2503 if (!master || master->ifindex != master_idx) 2504 return true; 2505 2506 return false; 2507 } 2508 2509 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx) 2510 { 2511 if (filter_idx && (!dev || dev->ifindex != filter_idx)) 2512 return true; 2513 2514 return false; 2515 } 2516 2517 struct neigh_dump_filter { 2518 int master_idx; 2519 int dev_idx; 2520 }; 2521 2522 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2523 struct netlink_callback *cb, 2524 struct neigh_dump_filter *filter) 2525 { 2526 struct net *net = sock_net(skb->sk); 2527 struct neighbour *n; 2528 int rc, h, s_h = cb->args[1]; 2529 int idx, s_idx = idx = cb->args[2]; 2530 struct neigh_hash_table *nht; 2531 unsigned int flags = NLM_F_MULTI; 2532 2533 if (filter->dev_idx || filter->master_idx) 2534 flags |= NLM_F_DUMP_FILTERED; 2535 2536 rcu_read_lock_bh(); 2537 nht = rcu_dereference_bh(tbl->nht); 2538 2539 for (h = s_h; h < (1 << nht->hash_shift); h++) { 2540 if (h > s_h) 2541 s_idx = 0; 2542 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; 2543 n != NULL; 2544 n = rcu_dereference_bh(n->next)) { 2545 if (idx < s_idx || !net_eq(dev_net(n->dev), net)) 2546 goto next; 2547 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || 2548 neigh_master_filtered(n->dev, filter->master_idx)) 2549 goto next; 2550 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2551 cb->nlh->nlmsg_seq, 2552 RTM_NEWNEIGH, 2553 flags) < 0) { 2554 rc = -1; 2555 goto out; 2556 } 2557 next: 2558 idx++; 2559 } 2560 } 2561 rc = skb->len; 2562 out: 2563 rcu_read_unlock_bh(); 2564 cb->args[1] = h; 2565 cb->args[2] = idx; 2566 return rc; 2567 } 2568 2569 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2570 struct netlink_callback *cb, 2571 struct neigh_dump_filter *filter) 2572 { 2573 struct pneigh_entry *n; 2574 struct net *net = sock_net(skb->sk); 2575 int rc, h, s_h = cb->args[3]; 2576 int idx, s_idx = idx = cb->args[4]; 2577 unsigned int flags = NLM_F_MULTI; 2578 2579 if (filter->dev_idx || filter->master_idx) 2580 flags |= NLM_F_DUMP_FILTERED; 2581 2582 read_lock_bh(&tbl->lock); 2583 2584 for (h = s_h; h <= PNEIGH_HASHMASK; h++) { 2585 if (h > s_h) 2586 s_idx = 0; 2587 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { 2588 if (idx < s_idx || pneigh_net(n) != net) 2589 goto next; 2590 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || 2591 neigh_master_filtered(n->dev, filter->master_idx)) 2592 goto next; 2593 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2594 cb->nlh->nlmsg_seq, 2595 RTM_NEWNEIGH, flags, tbl) < 0) { 2596 read_unlock_bh(&tbl->lock); 2597 rc = -1; 2598 goto out; 2599 } 2600 next: 2601 idx++; 2602 } 2603 } 2604 2605 read_unlock_bh(&tbl->lock); 2606 rc = skb->len; 2607 out: 2608 cb->args[3] = h; 2609 cb->args[4] = idx; 2610 return rc; 2611 2612 } 2613 2614 static int neigh_valid_dump_req(const struct nlmsghdr *nlh, 2615 bool strict_check, 2616 struct neigh_dump_filter *filter, 2617 struct netlink_ext_ack *extack) 2618 { 2619 struct nlattr *tb[NDA_MAX + 1]; 2620 int err, i; 2621 2622 if (strict_check) { 2623 struct ndmsg *ndm; 2624 2625 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { 2626 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request"); 2627 return -EINVAL; 2628 } 2629 2630 ndm = nlmsg_data(nlh); 2631 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex || 2632 ndm->ndm_state || ndm->ndm_type) { 2633 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request"); 2634 return -EINVAL; 2635 } 2636 2637 if (ndm->ndm_flags & ~NTF_PROXY) { 2638 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request"); 2639 return -EINVAL; 2640 } 2641 2642 err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, 2643 nda_policy, extack); 2644 } else { 2645 err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, 2646 nda_policy, extack); 2647 } 2648 if (err < 0) 2649 return err; 2650 2651 for (i = 0; i <= NDA_MAX; ++i) { 2652 if (!tb[i]) 2653 continue; 2654 2655 /* all new attributes should require strict_check */ 2656 switch (i) { 2657 case NDA_IFINDEX: 2658 filter->dev_idx = nla_get_u32(tb[i]); 2659 break; 2660 case NDA_MASTER: 2661 filter->master_idx = nla_get_u32(tb[i]); 2662 break; 2663 default: 2664 if (strict_check) { 2665 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request"); 2666 return -EINVAL; 2667 } 2668 } 2669 } 2670 2671 return 0; 2672 } 2673 2674 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2675 { 2676 const struct nlmsghdr *nlh = cb->nlh; 2677 struct neigh_dump_filter filter = {}; 2678 struct neigh_table *tbl; 2679 int t, family, s_t; 2680 int proxy = 0; 2681 int err; 2682 2683 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; 2684 2685 /* check for full ndmsg structure presence, family member is 2686 * the same for both structures 2687 */ 2688 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) && 2689 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY) 2690 proxy = 1; 2691 2692 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack); 2693 if (err < 0 && cb->strict_check) 2694 return err; 2695 2696 s_t = cb->args[0]; 2697 2698 for (t = 0; t < NEIGH_NR_TABLES; t++) { 2699 tbl = neigh_tables[t]; 2700 2701 if (!tbl) 2702 continue; 2703 if (t < s_t || (family && tbl->family != family)) 2704 continue; 2705 if (t > s_t) 2706 memset(&cb->args[1], 0, sizeof(cb->args) - 2707 sizeof(cb->args[0])); 2708 if (proxy) 2709 err = pneigh_dump_table(tbl, skb, cb, &filter); 2710 else 2711 err = neigh_dump_table(tbl, skb, cb, &filter); 2712 if (err < 0) 2713 break; 2714 } 2715 2716 cb->args[0] = t; 2717 return skb->len; 2718 } 2719 2720 static int neigh_valid_get_req(const struct nlmsghdr *nlh, 2721 struct neigh_table **tbl, 2722 void **dst, int *dev_idx, u8 *ndm_flags, 2723 struct netlink_ext_ack *extack) 2724 { 2725 struct nlattr *tb[NDA_MAX + 1]; 2726 struct ndmsg *ndm; 2727 int err, i; 2728 2729 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { 2730 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request"); 2731 return -EINVAL; 2732 } 2733 2734 ndm = nlmsg_data(nlh); 2735 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || 2736 ndm->ndm_type) { 2737 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request"); 2738 return -EINVAL; 2739 } 2740 2741 if (ndm->ndm_flags & ~NTF_PROXY) { 2742 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request"); 2743 return -EINVAL; 2744 } 2745 2746 err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, 2747 nda_policy, extack); 2748 if (err < 0) 2749 return err; 2750 2751 *ndm_flags = ndm->ndm_flags; 2752 *dev_idx = ndm->ndm_ifindex; 2753 *tbl = neigh_find_table(ndm->ndm_family); 2754 if (*tbl == NULL) { 2755 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request"); 2756 return -EAFNOSUPPORT; 2757 } 2758 2759 for (i = 0; i <= NDA_MAX; ++i) { 2760 if (!tb[i]) 2761 continue; 2762 2763 switch (i) { 2764 case NDA_DST: 2765 if (nla_len(tb[i]) != (int)(*tbl)->key_len) { 2766 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request"); 2767 return -EINVAL; 2768 } 2769 *dst = nla_data(tb[i]); 2770 break; 2771 default: 2772 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request"); 2773 return -EINVAL; 2774 } 2775 } 2776 2777 return 0; 2778 } 2779 2780 static inline size_t neigh_nlmsg_size(void) 2781 { 2782 return NLMSG_ALIGN(sizeof(struct ndmsg)) 2783 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ 2784 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */ 2785 + nla_total_size(sizeof(struct nda_cacheinfo)) 2786 + nla_total_size(4) /* NDA_PROBES */ 2787 + nla_total_size(1); /* NDA_PROTOCOL */ 2788 } 2789 2790 static int neigh_get_reply(struct net *net, struct neighbour *neigh, 2791 u32 pid, u32 seq) 2792 { 2793 struct sk_buff *skb; 2794 int err = 0; 2795 2796 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL); 2797 if (!skb) 2798 return -ENOBUFS; 2799 2800 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0); 2801 if (err) { 2802 kfree_skb(skb); 2803 goto errout; 2804 } 2805 2806 err = rtnl_unicast(skb, net, pid); 2807 errout: 2808 return err; 2809 } 2810 2811 static inline size_t pneigh_nlmsg_size(void) 2812 { 2813 return NLMSG_ALIGN(sizeof(struct ndmsg)) 2814 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ 2815 + nla_total_size(1); /* NDA_PROTOCOL */ 2816 } 2817 2818 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh, 2819 u32 pid, u32 seq, struct neigh_table *tbl) 2820 { 2821 struct sk_buff *skb; 2822 int err = 0; 2823 2824 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL); 2825 if (!skb) 2826 return -ENOBUFS; 2827 2828 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl); 2829 if (err) { 2830 kfree_skb(skb); 2831 goto errout; 2832 } 2833 2834 err = rtnl_unicast(skb, net, pid); 2835 errout: 2836 return err; 2837 } 2838 2839 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2840 struct netlink_ext_ack *extack) 2841 { 2842 struct net *net = sock_net(in_skb->sk); 2843 struct net_device *dev = NULL; 2844 struct neigh_table *tbl = NULL; 2845 struct neighbour *neigh; 2846 void *dst = NULL; 2847 u8 ndm_flags = 0; 2848 int dev_idx = 0; 2849 int err; 2850 2851 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags, 2852 extack); 2853 if (err < 0) 2854 return err; 2855 2856 if (dev_idx) { 2857 dev = __dev_get_by_index(net, dev_idx); 2858 if (!dev) { 2859 NL_SET_ERR_MSG(extack, "Unknown device ifindex"); 2860 return -ENODEV; 2861 } 2862 } 2863 2864 if (!dst) { 2865 NL_SET_ERR_MSG(extack, "Network address not specified"); 2866 return -EINVAL; 2867 } 2868 2869 if (ndm_flags & NTF_PROXY) { 2870 struct pneigh_entry *pn; 2871 2872 pn = pneigh_lookup(tbl, net, dst, dev, 0); 2873 if (!pn) { 2874 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found"); 2875 return -ENOENT; 2876 } 2877 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid, 2878 nlh->nlmsg_seq, tbl); 2879 } 2880 2881 if (!dev) { 2882 NL_SET_ERR_MSG(extack, "No device specified"); 2883 return -EINVAL; 2884 } 2885 2886 neigh = neigh_lookup(tbl, dst, dev); 2887 if (!neigh) { 2888 NL_SET_ERR_MSG(extack, "Neighbour entry not found"); 2889 return -ENOENT; 2890 } 2891 2892 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid, 2893 nlh->nlmsg_seq); 2894 2895 neigh_release(neigh); 2896 2897 return err; 2898 } 2899 2900 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) 2901 { 2902 int chain; 2903 struct neigh_hash_table *nht; 2904 2905 rcu_read_lock_bh(); 2906 nht = rcu_dereference_bh(tbl->nht); 2907 2908 read_lock(&tbl->lock); /* avoid resizes */ 2909 for (chain = 0; chain < (1 << nht->hash_shift); chain++) { 2910 struct neighbour *n; 2911 2912 for (n = rcu_dereference_bh(nht->hash_buckets[chain]); 2913 n != NULL; 2914 n = rcu_dereference_bh(n->next)) 2915 cb(n, cookie); 2916 } 2917 read_unlock(&tbl->lock); 2918 rcu_read_unlock_bh(); 2919 } 2920 EXPORT_SYMBOL(neigh_for_each); 2921 2922 /* The tbl->lock must be held as a writer and BH disabled. */ 2923 void __neigh_for_each_release(struct neigh_table *tbl, 2924 int (*cb)(struct neighbour *)) 2925 { 2926 int chain; 2927 struct neigh_hash_table *nht; 2928 2929 nht = rcu_dereference_protected(tbl->nht, 2930 lockdep_is_held(&tbl->lock)); 2931 for (chain = 0; chain < (1 << nht->hash_shift); chain++) { 2932 struct neighbour *n; 2933 struct neighbour __rcu **np; 2934 2935 np = &nht->hash_buckets[chain]; 2936 while ((n = rcu_dereference_protected(*np, 2937 lockdep_is_held(&tbl->lock))) != NULL) { 2938 int release; 2939 2940 write_lock(&n->lock); 2941 release = cb(n); 2942 if (release) { 2943 rcu_assign_pointer(*np, 2944 rcu_dereference_protected(n->next, 2945 lockdep_is_held(&tbl->lock))); 2946 neigh_mark_dead(n); 2947 } else 2948 np = &n->next; 2949 write_unlock(&n->lock); 2950 if (release) 2951 neigh_cleanup_and_release(n); 2952 } 2953 } 2954 } 2955 EXPORT_SYMBOL(__neigh_for_each_release); 2956 2957 int neigh_xmit(int index, struct net_device *dev, 2958 const void *addr, struct sk_buff *skb) 2959 { 2960 int err = -EAFNOSUPPORT; 2961 if (likely(index < NEIGH_NR_TABLES)) { 2962 struct neigh_table *tbl; 2963 struct neighbour *neigh; 2964 2965 tbl = neigh_tables[index]; 2966 if (!tbl) 2967 goto out; 2968 rcu_read_lock_bh(); 2969 neigh = __neigh_lookup_noref(tbl, addr, dev); 2970 if (!neigh) 2971 neigh = __neigh_create(tbl, addr, dev, false); 2972 err = PTR_ERR(neigh); 2973 if (IS_ERR(neigh)) { 2974 rcu_read_unlock_bh(); 2975 goto out_kfree_skb; 2976 } 2977 err = neigh->output(neigh, skb); 2978 rcu_read_unlock_bh(); 2979 } 2980 else if (index == NEIGH_LINK_TABLE) { 2981 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 2982 addr, NULL, skb->len); 2983 if (err < 0) 2984 goto out_kfree_skb; 2985 err = dev_queue_xmit(skb); 2986 } 2987 out: 2988 return err; 2989 out_kfree_skb: 2990 kfree_skb(skb); 2991 goto out; 2992 } 2993 EXPORT_SYMBOL(neigh_xmit); 2994 2995 #ifdef CONFIG_PROC_FS 2996 2997 static struct neighbour *neigh_get_first(struct seq_file *seq) 2998 { 2999 struct neigh_seq_state *state = seq->private; 3000 struct net *net = seq_file_net(seq); 3001 struct neigh_hash_table *nht = state->nht; 3002 struct neighbour *n = NULL; 3003 int bucket = state->bucket; 3004 3005 state->flags &= ~NEIGH_SEQ_IS_PNEIGH; 3006 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { 3007 n = rcu_dereference_bh(nht->hash_buckets[bucket]); 3008 3009 while (n) { 3010 if (!net_eq(dev_net(n->dev), net)) 3011 goto next; 3012 if (state->neigh_sub_iter) { 3013 loff_t fakep = 0; 3014 void *v; 3015 3016 v = state->neigh_sub_iter(state, n, &fakep); 3017 if (!v) 3018 goto next; 3019 } 3020 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) 3021 break; 3022 if (n->nud_state & ~NUD_NOARP) 3023 break; 3024 next: 3025 n = rcu_dereference_bh(n->next); 3026 } 3027 3028 if (n) 3029 break; 3030 } 3031 state->bucket = bucket; 3032 3033 return n; 3034 } 3035 3036 static struct neighbour *neigh_get_next(struct seq_file *seq, 3037 struct neighbour *n, 3038 loff_t *pos) 3039 { 3040 struct neigh_seq_state *state = seq->private; 3041 struct net *net = seq_file_net(seq); 3042 struct neigh_hash_table *nht = state->nht; 3043 3044 if (state->neigh_sub_iter) { 3045 void *v = state->neigh_sub_iter(state, n, pos); 3046 if (v) 3047 return n; 3048 } 3049 n = rcu_dereference_bh(n->next); 3050 3051 while (1) { 3052 while (n) { 3053 if (!net_eq(dev_net(n->dev), net)) 3054 goto next; 3055 if (state->neigh_sub_iter) { 3056 void *v = state->neigh_sub_iter(state, n, pos); 3057 if (v) 3058 return n; 3059 goto next; 3060 } 3061 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) 3062 break; 3063 3064 if (n->nud_state & ~NUD_NOARP) 3065 break; 3066 next: 3067 n = rcu_dereference_bh(n->next); 3068 } 3069 3070 if (n) 3071 break; 3072 3073 if (++state->bucket >= (1 << nht->hash_shift)) 3074 break; 3075 3076 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]); 3077 } 3078 3079 if (n && pos) 3080 --(*pos); 3081 return n; 3082 } 3083 3084 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) 3085 { 3086 struct neighbour *n = neigh_get_first(seq); 3087 3088 if (n) { 3089 --(*pos); 3090 while (*pos) { 3091 n = neigh_get_next(seq, n, pos); 3092 if (!n) 3093 break; 3094 } 3095 } 3096 return *pos ? NULL : n; 3097 } 3098 3099 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) 3100 { 3101 struct neigh_seq_state *state = seq->private; 3102 struct net *net = seq_file_net(seq); 3103 struct neigh_table *tbl = state->tbl; 3104 struct pneigh_entry *pn = NULL; 3105 int bucket = state->bucket; 3106 3107 state->flags |= NEIGH_SEQ_IS_PNEIGH; 3108 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { 3109 pn = tbl->phash_buckets[bucket]; 3110 while (pn && !net_eq(pneigh_net(pn), net)) 3111 pn = pn->next; 3112 if (pn) 3113 break; 3114 } 3115 state->bucket = bucket; 3116 3117 return pn; 3118 } 3119 3120 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, 3121 struct pneigh_entry *pn, 3122 loff_t *pos) 3123 { 3124 struct neigh_seq_state *state = seq->private; 3125 struct net *net = seq_file_net(seq); 3126 struct neigh_table *tbl = state->tbl; 3127 3128 do { 3129 pn = pn->next; 3130 } while (pn && !net_eq(pneigh_net(pn), net)); 3131 3132 while (!pn) { 3133 if (++state->bucket > PNEIGH_HASHMASK) 3134 break; 3135 pn = tbl->phash_buckets[state->bucket]; 3136 while (pn && !net_eq(pneigh_net(pn), net)) 3137 pn = pn->next; 3138 if (pn) 3139 break; 3140 } 3141 3142 if (pn && pos) 3143 --(*pos); 3144 3145 return pn; 3146 } 3147 3148 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos) 3149 { 3150 struct pneigh_entry *pn = pneigh_get_first(seq); 3151 3152 if (pn) { 3153 --(*pos); 3154 while (*pos) { 3155 pn = pneigh_get_next(seq, pn, pos); 3156 if (!pn) 3157 break; 3158 } 3159 } 3160 return *pos ? NULL : pn; 3161 } 3162 3163 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) 3164 { 3165 struct neigh_seq_state *state = seq->private; 3166 void *rc; 3167 loff_t idxpos = *pos; 3168 3169 rc = neigh_get_idx(seq, &idxpos); 3170 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY)) 3171 rc = pneigh_get_idx(seq, &idxpos); 3172 3173 return rc; 3174 } 3175 3176 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) 3177 __acquires(rcu_bh) 3178 { 3179 struct neigh_seq_state *state = seq->private; 3180 3181 state->tbl = tbl; 3182 state->bucket = 0; 3183 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); 3184 3185 rcu_read_lock_bh(); 3186 state->nht = rcu_dereference_bh(tbl->nht); 3187 3188 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; 3189 } 3190 EXPORT_SYMBOL(neigh_seq_start); 3191 3192 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3193 { 3194 struct neigh_seq_state *state; 3195 void *rc; 3196 3197 if (v == SEQ_START_TOKEN) { 3198 rc = neigh_get_first(seq); 3199 goto out; 3200 } 3201 3202 state = seq->private; 3203 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) { 3204 rc = neigh_get_next(seq, v, NULL); 3205 if (rc) 3206 goto out; 3207 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY)) 3208 rc = pneigh_get_first(seq); 3209 } else { 3210 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY); 3211 rc = pneigh_get_next(seq, v, NULL); 3212 } 3213 out: 3214 ++(*pos); 3215 return rc; 3216 } 3217 EXPORT_SYMBOL(neigh_seq_next); 3218 3219 void neigh_seq_stop(struct seq_file *seq, void *v) 3220 __releases(rcu_bh) 3221 { 3222 rcu_read_unlock_bh(); 3223 } 3224 EXPORT_SYMBOL(neigh_seq_stop); 3225 3226 /* statistics via seq_file */ 3227 3228 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) 3229 { 3230 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3231 int cpu; 3232 3233 if (*pos == 0) 3234 return SEQ_START_TOKEN; 3235 3236 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { 3237 if (!cpu_possible(cpu)) 3238 continue; 3239 *pos = cpu+1; 3240 return per_cpu_ptr(tbl->stats, cpu); 3241 } 3242 return NULL; 3243 } 3244 3245 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3246 { 3247 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3248 int cpu; 3249 3250 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { 3251 if (!cpu_possible(cpu)) 3252 continue; 3253 *pos = cpu+1; 3254 return per_cpu_ptr(tbl->stats, cpu); 3255 } 3256 return NULL; 3257 } 3258 3259 static void neigh_stat_seq_stop(struct seq_file *seq, void *v) 3260 { 3261 3262 } 3263 3264 static int neigh_stat_seq_show(struct seq_file *seq, void *v) 3265 { 3266 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3267 struct neigh_statistics *st = v; 3268 3269 if (v == SEQ_START_TOKEN) { 3270 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n"); 3271 return 0; 3272 } 3273 3274 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " 3275 "%08lx %08lx %08lx %08lx %08lx %08lx\n", 3276 atomic_read(&tbl->entries), 3277 3278 st->allocs, 3279 st->destroys, 3280 st->hash_grows, 3281 3282 st->lookups, 3283 st->hits, 3284 3285 st->res_failed, 3286 3287 st->rcv_probes_mcast, 3288 st->rcv_probes_ucast, 3289 3290 st->periodic_gc_runs, 3291 st->forced_gc_runs, 3292 st->unres_discards, 3293 st->table_fulls 3294 ); 3295 3296 return 0; 3297 } 3298 3299 static const struct seq_operations neigh_stat_seq_ops = { 3300 .start = neigh_stat_seq_start, 3301 .next = neigh_stat_seq_next, 3302 .stop = neigh_stat_seq_stop, 3303 .show = neigh_stat_seq_show, 3304 }; 3305 #endif /* CONFIG_PROC_FS */ 3306 3307 static void __neigh_notify(struct neighbour *n, int type, int flags, 3308 u32 pid) 3309 { 3310 struct net *net = dev_net(n->dev); 3311 struct sk_buff *skb; 3312 int err = -ENOBUFS; 3313 3314 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC); 3315 if (skb == NULL) 3316 goto errout; 3317 3318 err = neigh_fill_info(skb, n, pid, 0, type, flags); 3319 if (err < 0) { 3320 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */ 3321 WARN_ON(err == -EMSGSIZE); 3322 kfree_skb(skb); 3323 goto errout; 3324 } 3325 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); 3326 return; 3327 errout: 3328 if (err < 0) 3329 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); 3330 } 3331 3332 void neigh_app_ns(struct neighbour *n) 3333 { 3334 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0); 3335 } 3336 EXPORT_SYMBOL(neigh_app_ns); 3337 3338 #ifdef CONFIG_SYSCTL 3339 static int zero; 3340 static int int_max = INT_MAX; 3341 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); 3342 3343 static int proc_unres_qlen(struct ctl_table *ctl, int write, 3344 void __user *buffer, size_t *lenp, loff_t *ppos) 3345 { 3346 int size, ret; 3347 struct ctl_table tmp = *ctl; 3348 3349 tmp.extra1 = &zero; 3350 tmp.extra2 = &unres_qlen_max; 3351 tmp.data = &size; 3352 3353 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN); 3354 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 3355 3356 if (write && !ret) 3357 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); 3358 return ret; 3359 } 3360 3361 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, 3362 int family) 3363 { 3364 switch (family) { 3365 case AF_INET: 3366 return __in_dev_arp_parms_get_rcu(dev); 3367 case AF_INET6: 3368 return __in6_dev_nd_parms_get_rcu(dev); 3369 } 3370 return NULL; 3371 } 3372 3373 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, 3374 int index) 3375 { 3376 struct net_device *dev; 3377 int family = neigh_parms_family(p); 3378 3379 rcu_read_lock(); 3380 for_each_netdev_rcu(net, dev) { 3381 struct neigh_parms *dst_p = 3382 neigh_get_dev_parms_rcu(dev, family); 3383 3384 if (dst_p && !test_bit(index, dst_p->data_state)) 3385 dst_p->data[index] = p->data[index]; 3386 } 3387 rcu_read_unlock(); 3388 } 3389 3390 static void neigh_proc_update(struct ctl_table *ctl, int write) 3391 { 3392 struct net_device *dev = ctl->extra1; 3393 struct neigh_parms *p = ctl->extra2; 3394 struct net *net = neigh_parms_net(p); 3395 int index = (int *) ctl->data - p->data; 3396 3397 if (!write) 3398 return; 3399 3400 set_bit(index, p->data_state); 3401 if (index == NEIGH_VAR_DELAY_PROBE_TIME) 3402 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); 3403 if (!dev) /* NULL dev means this is default value */ 3404 neigh_copy_dflt_parms(net, p, index); 3405 } 3406 3407 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, 3408 void __user *buffer, 3409 size_t *lenp, loff_t *ppos) 3410 { 3411 struct ctl_table tmp = *ctl; 3412 int ret; 3413 3414 tmp.extra1 = &zero; 3415 tmp.extra2 = &int_max; 3416 3417 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 3418 neigh_proc_update(ctl, write); 3419 return ret; 3420 } 3421 3422 int neigh_proc_dointvec(struct ctl_table *ctl, int write, 3423 void __user *buffer, size_t *lenp, loff_t *ppos) 3424 { 3425 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 3426 3427 neigh_proc_update(ctl, write); 3428 return ret; 3429 } 3430 EXPORT_SYMBOL(neigh_proc_dointvec); 3431 3432 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, 3433 void __user *buffer, 3434 size_t *lenp, loff_t *ppos) 3435 { 3436 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); 3437 3438 neigh_proc_update(ctl, write); 3439 return ret; 3440 } 3441 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies); 3442 3443 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write, 3444 void __user *buffer, 3445 size_t *lenp, loff_t *ppos) 3446 { 3447 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos); 3448 3449 neigh_proc_update(ctl, write); 3450 return ret; 3451 } 3452 3453 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, 3454 void __user *buffer, 3455 size_t *lenp, loff_t *ppos) 3456 { 3457 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); 3458 3459 neigh_proc_update(ctl, write); 3460 return ret; 3461 } 3462 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies); 3463 3464 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, 3465 void __user *buffer, 3466 size_t *lenp, loff_t *ppos) 3467 { 3468 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos); 3469 3470 neigh_proc_update(ctl, write); 3471 return ret; 3472 } 3473 3474 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write, 3475 void __user *buffer, 3476 size_t *lenp, loff_t *ppos) 3477 { 3478 struct neigh_parms *p = ctl->extra2; 3479 int ret; 3480 3481 if (strcmp(ctl->procname, "base_reachable_time") == 0) 3482 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); 3483 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0) 3484 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); 3485 else 3486 ret = -1; 3487 3488 if (write && ret == 0) { 3489 /* update reachable_time as well, otherwise, the change will 3490 * only be effective after the next time neigh_periodic_work 3491 * decides to recompute it 3492 */ 3493 p->reachable_time = 3494 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 3495 } 3496 return ret; 3497 } 3498 3499 #define NEIGH_PARMS_DATA_OFFSET(index) \ 3500 (&((struct neigh_parms *) 0)->data[index]) 3501 3502 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \ 3503 [NEIGH_VAR_ ## attr] = { \ 3504 .procname = name, \ 3505 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \ 3506 .maxlen = sizeof(int), \ 3507 .mode = mval, \ 3508 .proc_handler = proc, \ 3509 } 3510 3511 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \ 3512 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax) 3513 3514 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \ 3515 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies) 3516 3517 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \ 3518 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies) 3519 3520 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \ 3521 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies) 3522 3523 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \ 3524 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies) 3525 3526 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \ 3527 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen) 3528 3529 static struct neigh_sysctl_table { 3530 struct ctl_table_header *sysctl_header; 3531 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; 3532 } neigh_sysctl_template __read_mostly = { 3533 .neigh_vars = { 3534 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), 3535 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"), 3536 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"), 3537 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"), 3538 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), 3539 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), 3540 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), 3541 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"), 3542 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"), 3543 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"), 3544 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"), 3545 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"), 3546 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"), 3547 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"), 3548 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"), 3549 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"), 3550 [NEIGH_VAR_GC_INTERVAL] = { 3551 .procname = "gc_interval", 3552 .maxlen = sizeof(int), 3553 .mode = 0644, 3554 .proc_handler = proc_dointvec_jiffies, 3555 }, 3556 [NEIGH_VAR_GC_THRESH1] = { 3557 .procname = "gc_thresh1", 3558 .maxlen = sizeof(int), 3559 .mode = 0644, 3560 .extra1 = &zero, 3561 .extra2 = &int_max, 3562 .proc_handler = proc_dointvec_minmax, 3563 }, 3564 [NEIGH_VAR_GC_THRESH2] = { 3565 .procname = "gc_thresh2", 3566 .maxlen = sizeof(int), 3567 .mode = 0644, 3568 .extra1 = &zero, 3569 .extra2 = &int_max, 3570 .proc_handler = proc_dointvec_minmax, 3571 }, 3572 [NEIGH_VAR_GC_THRESH3] = { 3573 .procname = "gc_thresh3", 3574 .maxlen = sizeof(int), 3575 .mode = 0644, 3576 .extra1 = &zero, 3577 .extra2 = &int_max, 3578 .proc_handler = proc_dointvec_minmax, 3579 }, 3580 {}, 3581 }, 3582 }; 3583 3584 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, 3585 proc_handler *handler) 3586 { 3587 int i; 3588 struct neigh_sysctl_table *t; 3589 const char *dev_name_source; 3590 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; 3591 char *p_name; 3592 3593 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); 3594 if (!t) 3595 goto err; 3596 3597 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) { 3598 t->neigh_vars[i].data += (long) p; 3599 t->neigh_vars[i].extra1 = dev; 3600 t->neigh_vars[i].extra2 = p; 3601 } 3602 3603 if (dev) { 3604 dev_name_source = dev->name; 3605 /* Terminate the table early */ 3606 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, 3607 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); 3608 } else { 3609 struct neigh_table *tbl = p->tbl; 3610 dev_name_source = "default"; 3611 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval; 3612 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1; 3613 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2; 3614 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3; 3615 } 3616 3617 if (handler) { 3618 /* RetransTime */ 3619 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; 3620 /* ReachableTime */ 3621 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; 3622 /* RetransTime (in milliseconds)*/ 3623 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; 3624 /* ReachableTime (in milliseconds) */ 3625 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; 3626 } else { 3627 /* Those handlers will update p->reachable_time after 3628 * base_reachable_time(_ms) is set to ensure the new timer starts being 3629 * applied after the next neighbour update instead of waiting for 3630 * neigh_periodic_work to update its value (can be multiple minutes) 3631 * So any handler that replaces them should do this as well 3632 */ 3633 /* ReachableTime */ 3634 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = 3635 neigh_proc_base_reachable_time; 3636 /* ReachableTime (in milliseconds) */ 3637 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = 3638 neigh_proc_base_reachable_time; 3639 } 3640 3641 /* Don't export sysctls to unprivileged users */ 3642 if (neigh_parms_net(p)->user_ns != &init_user_ns) 3643 t->neigh_vars[0].procname = NULL; 3644 3645 switch (neigh_parms_family(p)) { 3646 case AF_INET: 3647 p_name = "ipv4"; 3648 break; 3649 case AF_INET6: 3650 p_name = "ipv6"; 3651 break; 3652 default: 3653 BUG(); 3654 } 3655 3656 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", 3657 p_name, dev_name_source); 3658 t->sysctl_header = 3659 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars); 3660 if (!t->sysctl_header) 3661 goto free; 3662 3663 p->sysctl_table = t; 3664 return 0; 3665 3666 free: 3667 kfree(t); 3668 err: 3669 return -ENOBUFS; 3670 } 3671 EXPORT_SYMBOL(neigh_sysctl_register); 3672 3673 void neigh_sysctl_unregister(struct neigh_parms *p) 3674 { 3675 if (p->sysctl_table) { 3676 struct neigh_sysctl_table *t = p->sysctl_table; 3677 p->sysctl_table = NULL; 3678 unregister_net_sysctl_table(t->sysctl_header); 3679 kfree(t); 3680 } 3681 } 3682 EXPORT_SYMBOL(neigh_sysctl_unregister); 3683 3684 #endif /* CONFIG_SYSCTL */ 3685 3686 static int __init neigh_init(void) 3687 { 3688 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0); 3689 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0); 3690 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0); 3691 3692 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, 3693 0); 3694 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0); 3695 3696 return 0; 3697 } 3698 3699 subsys_initcall(neigh_init); 3700