1 /* 2 * Generic address resolution entity 3 * 4 * Authors: 5 * Pedro Roque <roque@di.fc.ul.pt> 6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 * 13 * Fixes: 14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add. 15 * Harald Welte Add neighbour cache statistics like rtstat 16 */ 17 18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 19 20 #include <linux/slab.h> 21 #include <linux/kmemleak.h> 22 #include <linux/types.h> 23 #include <linux/kernel.h> 24 #include <linux/module.h> 25 #include <linux/socket.h> 26 #include <linux/netdevice.h> 27 #include <linux/proc_fs.h> 28 #ifdef CONFIG_SYSCTL 29 #include <linux/sysctl.h> 30 #endif 31 #include <linux/times.h> 32 #include <net/net_namespace.h> 33 #include <net/neighbour.h> 34 #include <net/dst.h> 35 #include <net/sock.h> 36 #include <net/netevent.h> 37 #include <net/netlink.h> 38 #include <linux/rtnetlink.h> 39 #include <linux/random.h> 40 #include <linux/string.h> 41 #include <linux/log2.h> 42 #include <linux/inetdevice.h> 43 #include <net/addrconf.h> 44 45 #include <trace/events/neigh.h> 46 47 #define DEBUG 48 #define NEIGH_DEBUG 1 49 #define neigh_dbg(level, fmt, ...) \ 50 do { \ 51 if (level <= NEIGH_DEBUG) \ 52 pr_debug(fmt, ##__VA_ARGS__); \ 53 } while (0) 54 55 #define PNEIGH_HASHMASK 0xF 56 57 static void neigh_timer_handler(struct timer_list *t); 58 static void __neigh_notify(struct neighbour *n, int type, int flags, 59 u32 pid); 60 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid); 61 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, 62 struct net_device *dev); 63 64 #ifdef CONFIG_PROC_FS 65 static const struct seq_operations neigh_stat_seq_ops; 66 #endif 67 68 /* 69 Neighbour hash table buckets are protected with rwlock tbl->lock. 70 71 - All the scans/updates to hash buckets MUST be made under this lock. 72 - NOTHING clever should be made under this lock: no callbacks 73 to protocol backends, no attempts to send something to network. 74 It will result in deadlocks, if backend/driver wants to use neighbour 75 cache. 76 - If the entry requires some non-trivial actions, increase 77 its reference count and release table lock. 78 79 Neighbour entries are protected: 80 - with reference count. 81 - with rwlock neigh->lock 82 83 Reference count prevents destruction. 84 85 neigh->lock mainly serializes ll address data and its validity state. 86 However, the same lock is used to protect another entry fields: 87 - timer 88 - resolution queue 89 90 Again, nothing clever shall be made under neigh->lock, 91 the most complicated procedure, which we allow is dev->hard_header. 92 It is supposed, that dev->hard_header is simplistic and does 93 not make callbacks to neighbour tables. 94 */ 95 96 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb) 97 { 98 kfree_skb(skb); 99 return -ENETDOWN; 100 } 101 102 static void neigh_cleanup_and_release(struct neighbour *neigh) 103 { 104 if (neigh->parms->neigh_cleanup) 105 neigh->parms->neigh_cleanup(neigh); 106 107 trace_neigh_cleanup_and_release(neigh, 0); 108 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0); 109 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); 110 neigh_release(neigh); 111 } 112 113 /* 114 * It is random distribution in the interval (1/2)*base...(3/2)*base. 115 * It corresponds to default IPv6 settings and is not overridable, 116 * because it is really reasonable choice. 117 */ 118 119 unsigned long neigh_rand_reach_time(unsigned long base) 120 { 121 return base ? (prandom_u32() % base) + (base >> 1) : 0; 122 } 123 EXPORT_SYMBOL(neigh_rand_reach_time); 124 125 static void neigh_mark_dead(struct neighbour *n) 126 { 127 n->dead = 1; 128 if (!list_empty(&n->gc_list)) { 129 list_del_init(&n->gc_list); 130 atomic_dec(&n->tbl->gc_entries); 131 } 132 } 133 134 static void neigh_update_gc_list(struct neighbour *n) 135 { 136 bool on_gc_list, exempt_from_gc; 137 138 write_lock_bh(&n->tbl->lock); 139 write_lock(&n->lock); 140 141 /* remove from the gc list if new state is permanent or if neighbor 142 * is externally learned; otherwise entry should be on the gc list 143 */ 144 exempt_from_gc = n->nud_state & NUD_PERMANENT || 145 n->flags & NTF_EXT_LEARNED; 146 on_gc_list = !list_empty(&n->gc_list); 147 148 if (exempt_from_gc && on_gc_list) { 149 list_del_init(&n->gc_list); 150 atomic_dec(&n->tbl->gc_entries); 151 } else if (!exempt_from_gc && !on_gc_list) { 152 /* add entries to the tail; cleaning removes from the front */ 153 list_add_tail(&n->gc_list, &n->tbl->gc_list); 154 atomic_inc(&n->tbl->gc_entries); 155 } 156 157 write_unlock(&n->lock); 158 write_unlock_bh(&n->tbl->lock); 159 } 160 161 static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags, 162 int *notify) 163 { 164 bool rc = false; 165 u8 ndm_flags; 166 167 if (!(flags & NEIGH_UPDATE_F_ADMIN)) 168 return rc; 169 170 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; 171 if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { 172 if (ndm_flags & NTF_EXT_LEARNED) 173 neigh->flags |= NTF_EXT_LEARNED; 174 else 175 neigh->flags &= ~NTF_EXT_LEARNED; 176 rc = true; 177 *notify = 1; 178 } 179 180 return rc; 181 } 182 183 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, 184 struct neigh_table *tbl) 185 { 186 bool retval = false; 187 188 write_lock(&n->lock); 189 if (refcount_read(&n->refcnt) == 1) { 190 struct neighbour *neigh; 191 192 neigh = rcu_dereference_protected(n->next, 193 lockdep_is_held(&tbl->lock)); 194 rcu_assign_pointer(*np, neigh); 195 neigh_mark_dead(n); 196 retval = true; 197 } 198 write_unlock(&n->lock); 199 if (retval) 200 neigh_cleanup_and_release(n); 201 return retval; 202 } 203 204 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) 205 { 206 struct neigh_hash_table *nht; 207 void *pkey = ndel->primary_key; 208 u32 hash_val; 209 struct neighbour *n; 210 struct neighbour __rcu **np; 211 212 nht = rcu_dereference_protected(tbl->nht, 213 lockdep_is_held(&tbl->lock)); 214 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd); 215 hash_val = hash_val >> (32 - nht->hash_shift); 216 217 np = &nht->hash_buckets[hash_val]; 218 while ((n = rcu_dereference_protected(*np, 219 lockdep_is_held(&tbl->lock)))) { 220 if (n == ndel) 221 return neigh_del(n, np, tbl); 222 np = &n->next; 223 } 224 return false; 225 } 226 227 static int neigh_forced_gc(struct neigh_table *tbl) 228 { 229 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2; 230 unsigned long tref = jiffies - 5 * HZ; 231 struct neighbour *n, *tmp; 232 int shrunk = 0; 233 234 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); 235 236 write_lock_bh(&tbl->lock); 237 238 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) { 239 if (refcount_read(&n->refcnt) == 1) { 240 bool remove = false; 241 242 write_lock(&n->lock); 243 if ((n->nud_state == NUD_FAILED) || 244 time_after(tref, n->updated)) 245 remove = true; 246 write_unlock(&n->lock); 247 248 if (remove && neigh_remove_one(n, tbl)) 249 shrunk++; 250 if (shrunk >= max_clean) 251 break; 252 } 253 } 254 255 tbl->last_flush = jiffies; 256 257 write_unlock_bh(&tbl->lock); 258 259 return shrunk; 260 } 261 262 static void neigh_add_timer(struct neighbour *n, unsigned long when) 263 { 264 neigh_hold(n); 265 if (unlikely(mod_timer(&n->timer, when))) { 266 printk("NEIGH: BUG, double timer add, state is %x\n", 267 n->nud_state); 268 dump_stack(); 269 } 270 } 271 272 static int neigh_del_timer(struct neighbour *n) 273 { 274 if ((n->nud_state & NUD_IN_TIMER) && 275 del_timer(&n->timer)) { 276 neigh_release(n); 277 return 1; 278 } 279 return 0; 280 } 281 282 static void pneigh_queue_purge(struct sk_buff_head *list) 283 { 284 struct sk_buff *skb; 285 286 while ((skb = skb_dequeue(list)) != NULL) { 287 dev_put(skb->dev); 288 kfree_skb(skb); 289 } 290 } 291 292 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, 293 bool skip_perm) 294 { 295 int i; 296 struct neigh_hash_table *nht; 297 298 nht = rcu_dereference_protected(tbl->nht, 299 lockdep_is_held(&tbl->lock)); 300 301 for (i = 0; i < (1 << nht->hash_shift); i++) { 302 struct neighbour *n; 303 struct neighbour __rcu **np = &nht->hash_buckets[i]; 304 305 while ((n = rcu_dereference_protected(*np, 306 lockdep_is_held(&tbl->lock))) != NULL) { 307 if (dev && n->dev != dev) { 308 np = &n->next; 309 continue; 310 } 311 if (skip_perm && n->nud_state & NUD_PERMANENT) { 312 np = &n->next; 313 continue; 314 } 315 rcu_assign_pointer(*np, 316 rcu_dereference_protected(n->next, 317 lockdep_is_held(&tbl->lock))); 318 write_lock(&n->lock); 319 neigh_del_timer(n); 320 neigh_mark_dead(n); 321 if (refcount_read(&n->refcnt) != 1) { 322 /* The most unpleasant situation. 323 We must destroy neighbour entry, 324 but someone still uses it. 325 326 The destroy will be delayed until 327 the last user releases us, but 328 we must kill timers etc. and move 329 it to safe state. 330 */ 331 __skb_queue_purge(&n->arp_queue); 332 n->arp_queue_len_bytes = 0; 333 n->output = neigh_blackhole; 334 if (n->nud_state & NUD_VALID) 335 n->nud_state = NUD_NOARP; 336 else 337 n->nud_state = NUD_NONE; 338 neigh_dbg(2, "neigh %p is stray\n", n); 339 } 340 write_unlock(&n->lock); 341 neigh_cleanup_and_release(n); 342 } 343 } 344 } 345 346 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) 347 { 348 write_lock_bh(&tbl->lock); 349 neigh_flush_dev(tbl, dev, false); 350 write_unlock_bh(&tbl->lock); 351 } 352 EXPORT_SYMBOL(neigh_changeaddr); 353 354 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, 355 bool skip_perm) 356 { 357 write_lock_bh(&tbl->lock); 358 neigh_flush_dev(tbl, dev, skip_perm); 359 pneigh_ifdown_and_unlock(tbl, dev); 360 361 del_timer_sync(&tbl->proxy_timer); 362 pneigh_queue_purge(&tbl->proxy_queue); 363 return 0; 364 } 365 366 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev) 367 { 368 __neigh_ifdown(tbl, dev, true); 369 return 0; 370 } 371 EXPORT_SYMBOL(neigh_carrier_down); 372 373 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) 374 { 375 __neigh_ifdown(tbl, dev, false); 376 return 0; 377 } 378 EXPORT_SYMBOL(neigh_ifdown); 379 380 static struct neighbour *neigh_alloc(struct neigh_table *tbl, 381 struct net_device *dev, 382 bool exempt_from_gc) 383 { 384 struct neighbour *n = NULL; 385 unsigned long now = jiffies; 386 int entries; 387 388 if (exempt_from_gc) 389 goto do_alloc; 390 391 entries = atomic_inc_return(&tbl->gc_entries) - 1; 392 if (entries >= tbl->gc_thresh3 || 393 (entries >= tbl->gc_thresh2 && 394 time_after(now, tbl->last_flush + 5 * HZ))) { 395 if (!neigh_forced_gc(tbl) && 396 entries >= tbl->gc_thresh3) { 397 net_info_ratelimited("%s: neighbor table overflow!\n", 398 tbl->id); 399 NEIGH_CACHE_STAT_INC(tbl, table_fulls); 400 goto out_entries; 401 } 402 } 403 404 do_alloc: 405 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC); 406 if (!n) 407 goto out_entries; 408 409 __skb_queue_head_init(&n->arp_queue); 410 rwlock_init(&n->lock); 411 seqlock_init(&n->ha_lock); 412 n->updated = n->used = now; 413 n->nud_state = NUD_NONE; 414 n->output = neigh_blackhole; 415 seqlock_init(&n->hh.hh_lock); 416 n->parms = neigh_parms_clone(&tbl->parms); 417 timer_setup(&n->timer, neigh_timer_handler, 0); 418 419 NEIGH_CACHE_STAT_INC(tbl, allocs); 420 n->tbl = tbl; 421 refcount_set(&n->refcnt, 1); 422 n->dead = 1; 423 INIT_LIST_HEAD(&n->gc_list); 424 425 atomic_inc(&tbl->entries); 426 out: 427 return n; 428 429 out_entries: 430 if (!exempt_from_gc) 431 atomic_dec(&tbl->gc_entries); 432 goto out; 433 } 434 435 static void neigh_get_hash_rnd(u32 *x) 436 { 437 *x = get_random_u32() | 1; 438 } 439 440 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) 441 { 442 size_t size = (1 << shift) * sizeof(struct neighbour *); 443 struct neigh_hash_table *ret; 444 struct neighbour __rcu **buckets; 445 int i; 446 447 ret = kmalloc(sizeof(*ret), GFP_ATOMIC); 448 if (!ret) 449 return NULL; 450 if (size <= PAGE_SIZE) { 451 buckets = kzalloc(size, GFP_ATOMIC); 452 } else { 453 buckets = (struct neighbour __rcu **) 454 __get_free_pages(GFP_ATOMIC | __GFP_ZERO, 455 get_order(size)); 456 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); 457 } 458 if (!buckets) { 459 kfree(ret); 460 return NULL; 461 } 462 ret->hash_buckets = buckets; 463 ret->hash_shift = shift; 464 for (i = 0; i < NEIGH_NUM_HASH_RND; i++) 465 neigh_get_hash_rnd(&ret->hash_rnd[i]); 466 return ret; 467 } 468 469 static void neigh_hash_free_rcu(struct rcu_head *head) 470 { 471 struct neigh_hash_table *nht = container_of(head, 472 struct neigh_hash_table, 473 rcu); 474 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); 475 struct neighbour __rcu **buckets = nht->hash_buckets; 476 477 if (size <= PAGE_SIZE) { 478 kfree(buckets); 479 } else { 480 kmemleak_free(buckets); 481 free_pages((unsigned long)buckets, get_order(size)); 482 } 483 kfree(nht); 484 } 485 486 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, 487 unsigned long new_shift) 488 { 489 unsigned int i, hash; 490 struct neigh_hash_table *new_nht, *old_nht; 491 492 NEIGH_CACHE_STAT_INC(tbl, hash_grows); 493 494 old_nht = rcu_dereference_protected(tbl->nht, 495 lockdep_is_held(&tbl->lock)); 496 new_nht = neigh_hash_alloc(new_shift); 497 if (!new_nht) 498 return old_nht; 499 500 for (i = 0; i < (1 << old_nht->hash_shift); i++) { 501 struct neighbour *n, *next; 502 503 for (n = rcu_dereference_protected(old_nht->hash_buckets[i], 504 lockdep_is_held(&tbl->lock)); 505 n != NULL; 506 n = next) { 507 hash = tbl->hash(n->primary_key, n->dev, 508 new_nht->hash_rnd); 509 510 hash >>= (32 - new_nht->hash_shift); 511 next = rcu_dereference_protected(n->next, 512 lockdep_is_held(&tbl->lock)); 513 514 rcu_assign_pointer(n->next, 515 rcu_dereference_protected( 516 new_nht->hash_buckets[hash], 517 lockdep_is_held(&tbl->lock))); 518 rcu_assign_pointer(new_nht->hash_buckets[hash], n); 519 } 520 } 521 522 rcu_assign_pointer(tbl->nht, new_nht); 523 call_rcu(&old_nht->rcu, neigh_hash_free_rcu); 524 return new_nht; 525 } 526 527 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, 528 struct net_device *dev) 529 { 530 struct neighbour *n; 531 532 NEIGH_CACHE_STAT_INC(tbl, lookups); 533 534 rcu_read_lock_bh(); 535 n = __neigh_lookup_noref(tbl, pkey, dev); 536 if (n) { 537 if (!refcount_inc_not_zero(&n->refcnt)) 538 n = NULL; 539 NEIGH_CACHE_STAT_INC(tbl, hits); 540 } 541 542 rcu_read_unlock_bh(); 543 return n; 544 } 545 EXPORT_SYMBOL(neigh_lookup); 546 547 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, 548 const void *pkey) 549 { 550 struct neighbour *n; 551 unsigned int key_len = tbl->key_len; 552 u32 hash_val; 553 struct neigh_hash_table *nht; 554 555 NEIGH_CACHE_STAT_INC(tbl, lookups); 556 557 rcu_read_lock_bh(); 558 nht = rcu_dereference_bh(tbl->nht); 559 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift); 560 561 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); 562 n != NULL; 563 n = rcu_dereference_bh(n->next)) { 564 if (!memcmp(n->primary_key, pkey, key_len) && 565 net_eq(dev_net(n->dev), net)) { 566 if (!refcount_inc_not_zero(&n->refcnt)) 567 n = NULL; 568 NEIGH_CACHE_STAT_INC(tbl, hits); 569 break; 570 } 571 } 572 573 rcu_read_unlock_bh(); 574 return n; 575 } 576 EXPORT_SYMBOL(neigh_lookup_nodev); 577 578 static struct neighbour *___neigh_create(struct neigh_table *tbl, 579 const void *pkey, 580 struct net_device *dev, 581 bool exempt_from_gc, bool want_ref) 582 { 583 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc); 584 u32 hash_val; 585 unsigned int key_len = tbl->key_len; 586 int error; 587 struct neigh_hash_table *nht; 588 589 if (!n) { 590 rc = ERR_PTR(-ENOBUFS); 591 goto out; 592 } 593 594 memcpy(n->primary_key, pkey, key_len); 595 n->dev = dev; 596 dev_hold(dev); 597 598 /* Protocol specific setup. */ 599 if (tbl->constructor && (error = tbl->constructor(n)) < 0) { 600 rc = ERR_PTR(error); 601 goto out_neigh_release; 602 } 603 604 if (dev->netdev_ops->ndo_neigh_construct) { 605 error = dev->netdev_ops->ndo_neigh_construct(dev, n); 606 if (error < 0) { 607 rc = ERR_PTR(error); 608 goto out_neigh_release; 609 } 610 } 611 612 /* Device specific setup. */ 613 if (n->parms->neigh_setup && 614 (error = n->parms->neigh_setup(n)) < 0) { 615 rc = ERR_PTR(error); 616 goto out_neigh_release; 617 } 618 619 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1); 620 621 write_lock_bh(&tbl->lock); 622 nht = rcu_dereference_protected(tbl->nht, 623 lockdep_is_held(&tbl->lock)); 624 625 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) 626 nht = neigh_hash_grow(tbl, nht->hash_shift + 1); 627 628 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift); 629 630 if (n->parms->dead) { 631 rc = ERR_PTR(-EINVAL); 632 goto out_tbl_unlock; 633 } 634 635 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], 636 lockdep_is_held(&tbl->lock)); 637 n1 != NULL; 638 n1 = rcu_dereference_protected(n1->next, 639 lockdep_is_held(&tbl->lock))) { 640 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { 641 if (want_ref) 642 neigh_hold(n1); 643 rc = n1; 644 goto out_tbl_unlock; 645 } 646 } 647 648 n->dead = 0; 649 if (!exempt_from_gc) 650 list_add_tail(&n->gc_list, &n->tbl->gc_list); 651 652 if (want_ref) 653 neigh_hold(n); 654 rcu_assign_pointer(n->next, 655 rcu_dereference_protected(nht->hash_buckets[hash_val], 656 lockdep_is_held(&tbl->lock))); 657 rcu_assign_pointer(nht->hash_buckets[hash_val], n); 658 write_unlock_bh(&tbl->lock); 659 neigh_dbg(2, "neigh %p is created\n", n); 660 rc = n; 661 out: 662 return rc; 663 out_tbl_unlock: 664 write_unlock_bh(&tbl->lock); 665 out_neigh_release: 666 neigh_release(n); 667 goto out; 668 } 669 670 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, 671 struct net_device *dev, bool want_ref) 672 { 673 return ___neigh_create(tbl, pkey, dev, false, want_ref); 674 } 675 EXPORT_SYMBOL(__neigh_create); 676 677 static u32 pneigh_hash(const void *pkey, unsigned int key_len) 678 { 679 u32 hash_val = *(u32 *)(pkey + key_len - 4); 680 hash_val ^= (hash_val >> 16); 681 hash_val ^= hash_val >> 8; 682 hash_val ^= hash_val >> 4; 683 hash_val &= PNEIGH_HASHMASK; 684 return hash_val; 685 } 686 687 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, 688 struct net *net, 689 const void *pkey, 690 unsigned int key_len, 691 struct net_device *dev) 692 { 693 while (n) { 694 if (!memcmp(n->key, pkey, key_len) && 695 net_eq(pneigh_net(n), net) && 696 (n->dev == dev || !n->dev)) 697 return n; 698 n = n->next; 699 } 700 return NULL; 701 } 702 703 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, 704 struct net *net, const void *pkey, struct net_device *dev) 705 { 706 unsigned int key_len = tbl->key_len; 707 u32 hash_val = pneigh_hash(pkey, key_len); 708 709 return __pneigh_lookup_1(tbl->phash_buckets[hash_val], 710 net, pkey, key_len, dev); 711 } 712 EXPORT_SYMBOL_GPL(__pneigh_lookup); 713 714 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, 715 struct net *net, const void *pkey, 716 struct net_device *dev, int creat) 717 { 718 struct pneigh_entry *n; 719 unsigned int key_len = tbl->key_len; 720 u32 hash_val = pneigh_hash(pkey, key_len); 721 722 read_lock_bh(&tbl->lock); 723 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val], 724 net, pkey, key_len, dev); 725 read_unlock_bh(&tbl->lock); 726 727 if (n || !creat) 728 goto out; 729 730 ASSERT_RTNL(); 731 732 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); 733 if (!n) 734 goto out; 735 736 n->protocol = 0; 737 write_pnet(&n->net, net); 738 memcpy(n->key, pkey, key_len); 739 n->dev = dev; 740 if (dev) 741 dev_hold(dev); 742 743 if (tbl->pconstructor && tbl->pconstructor(n)) { 744 if (dev) 745 dev_put(dev); 746 kfree(n); 747 n = NULL; 748 goto out; 749 } 750 751 write_lock_bh(&tbl->lock); 752 n->next = tbl->phash_buckets[hash_val]; 753 tbl->phash_buckets[hash_val] = n; 754 write_unlock_bh(&tbl->lock); 755 out: 756 return n; 757 } 758 EXPORT_SYMBOL(pneigh_lookup); 759 760 761 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, 762 struct net_device *dev) 763 { 764 struct pneigh_entry *n, **np; 765 unsigned int key_len = tbl->key_len; 766 u32 hash_val = pneigh_hash(pkey, key_len); 767 768 write_lock_bh(&tbl->lock); 769 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; 770 np = &n->next) { 771 if (!memcmp(n->key, pkey, key_len) && n->dev == dev && 772 net_eq(pneigh_net(n), net)) { 773 *np = n->next; 774 write_unlock_bh(&tbl->lock); 775 if (tbl->pdestructor) 776 tbl->pdestructor(n); 777 if (n->dev) 778 dev_put(n->dev); 779 kfree(n); 780 return 0; 781 } 782 } 783 write_unlock_bh(&tbl->lock); 784 return -ENOENT; 785 } 786 787 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, 788 struct net_device *dev) 789 { 790 struct pneigh_entry *n, **np, *freelist = NULL; 791 u32 h; 792 793 for (h = 0; h <= PNEIGH_HASHMASK; h++) { 794 np = &tbl->phash_buckets[h]; 795 while ((n = *np) != NULL) { 796 if (!dev || n->dev == dev) { 797 *np = n->next; 798 n->next = freelist; 799 freelist = n; 800 continue; 801 } 802 np = &n->next; 803 } 804 } 805 write_unlock_bh(&tbl->lock); 806 while ((n = freelist)) { 807 freelist = n->next; 808 n->next = NULL; 809 if (tbl->pdestructor) 810 tbl->pdestructor(n); 811 if (n->dev) 812 dev_put(n->dev); 813 kfree(n); 814 } 815 return -ENOENT; 816 } 817 818 static void neigh_parms_destroy(struct neigh_parms *parms); 819 820 static inline void neigh_parms_put(struct neigh_parms *parms) 821 { 822 if (refcount_dec_and_test(&parms->refcnt)) 823 neigh_parms_destroy(parms); 824 } 825 826 /* 827 * neighbour must already be out of the table; 828 * 829 */ 830 void neigh_destroy(struct neighbour *neigh) 831 { 832 struct net_device *dev = neigh->dev; 833 834 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); 835 836 if (!neigh->dead) { 837 pr_warn("Destroying alive neighbour %p\n", neigh); 838 dump_stack(); 839 return; 840 } 841 842 if (neigh_del_timer(neigh)) 843 pr_warn("Impossible event\n"); 844 845 write_lock_bh(&neigh->lock); 846 __skb_queue_purge(&neigh->arp_queue); 847 write_unlock_bh(&neigh->lock); 848 neigh->arp_queue_len_bytes = 0; 849 850 if (dev->netdev_ops->ndo_neigh_destroy) 851 dev->netdev_ops->ndo_neigh_destroy(dev, neigh); 852 853 dev_put(dev); 854 neigh_parms_put(neigh->parms); 855 856 neigh_dbg(2, "neigh %p is destroyed\n", neigh); 857 858 atomic_dec(&neigh->tbl->entries); 859 kfree_rcu(neigh, rcu); 860 } 861 EXPORT_SYMBOL(neigh_destroy); 862 863 /* Neighbour state is suspicious; 864 disable fast path. 865 866 Called with write_locked neigh. 867 */ 868 static void neigh_suspect(struct neighbour *neigh) 869 { 870 neigh_dbg(2, "neigh %p is suspected\n", neigh); 871 872 neigh->output = neigh->ops->output; 873 } 874 875 /* Neighbour state is OK; 876 enable fast path. 877 878 Called with write_locked neigh. 879 */ 880 static void neigh_connect(struct neighbour *neigh) 881 { 882 neigh_dbg(2, "neigh %p is connected\n", neigh); 883 884 neigh->output = neigh->ops->connected_output; 885 } 886 887 static void neigh_periodic_work(struct work_struct *work) 888 { 889 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); 890 struct neighbour *n; 891 struct neighbour __rcu **np; 892 unsigned int i; 893 struct neigh_hash_table *nht; 894 895 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); 896 897 write_lock_bh(&tbl->lock); 898 nht = rcu_dereference_protected(tbl->nht, 899 lockdep_is_held(&tbl->lock)); 900 901 /* 902 * periodically recompute ReachableTime from random function 903 */ 904 905 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) { 906 struct neigh_parms *p; 907 tbl->last_rand = jiffies; 908 list_for_each_entry(p, &tbl->parms_list, list) 909 p->reachable_time = 910 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 911 } 912 913 if (atomic_read(&tbl->entries) < tbl->gc_thresh1) 914 goto out; 915 916 for (i = 0 ; i < (1 << nht->hash_shift); i++) { 917 np = &nht->hash_buckets[i]; 918 919 while ((n = rcu_dereference_protected(*np, 920 lockdep_is_held(&tbl->lock))) != NULL) { 921 unsigned int state; 922 923 write_lock(&n->lock); 924 925 state = n->nud_state; 926 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) || 927 (n->flags & NTF_EXT_LEARNED)) { 928 write_unlock(&n->lock); 929 goto next_elt; 930 } 931 932 if (time_before(n->used, n->confirmed)) 933 n->used = n->confirmed; 934 935 if (refcount_read(&n->refcnt) == 1 && 936 (state == NUD_FAILED || 937 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { 938 *np = n->next; 939 neigh_mark_dead(n); 940 write_unlock(&n->lock); 941 neigh_cleanup_and_release(n); 942 continue; 943 } 944 write_unlock(&n->lock); 945 946 next_elt: 947 np = &n->next; 948 } 949 /* 950 * It's fine to release lock here, even if hash table 951 * grows while we are preempted. 952 */ 953 write_unlock_bh(&tbl->lock); 954 cond_resched(); 955 write_lock_bh(&tbl->lock); 956 nht = rcu_dereference_protected(tbl->nht, 957 lockdep_is_held(&tbl->lock)); 958 } 959 out: 960 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks. 961 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2 962 * BASE_REACHABLE_TIME. 963 */ 964 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 965 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1); 966 write_unlock_bh(&tbl->lock); 967 } 968 969 static __inline__ int neigh_max_probes(struct neighbour *n) 970 { 971 struct neigh_parms *p = n->parms; 972 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + 973 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) : 974 NEIGH_VAR(p, MCAST_PROBES)); 975 } 976 977 static void neigh_invalidate(struct neighbour *neigh) 978 __releases(neigh->lock) 979 __acquires(neigh->lock) 980 { 981 struct sk_buff *skb; 982 983 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); 984 neigh_dbg(2, "neigh %p is failed\n", neigh); 985 neigh->updated = jiffies; 986 987 /* It is very thin place. report_unreachable is very complicated 988 routine. Particularly, it can hit the same neighbour entry! 989 990 So that, we try to be accurate and avoid dead loop. --ANK 991 */ 992 while (neigh->nud_state == NUD_FAILED && 993 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { 994 write_unlock(&neigh->lock); 995 neigh->ops->error_report(neigh, skb); 996 write_lock(&neigh->lock); 997 } 998 __skb_queue_purge(&neigh->arp_queue); 999 neigh->arp_queue_len_bytes = 0; 1000 } 1001 1002 static void neigh_probe(struct neighbour *neigh) 1003 __releases(neigh->lock) 1004 { 1005 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue); 1006 /* keep skb alive even if arp_queue overflows */ 1007 if (skb) 1008 skb = skb_clone(skb, GFP_ATOMIC); 1009 write_unlock(&neigh->lock); 1010 if (neigh->ops->solicit) 1011 neigh->ops->solicit(neigh, skb); 1012 atomic_inc(&neigh->probes); 1013 consume_skb(skb); 1014 } 1015 1016 /* Called when a timer expires for a neighbour entry. */ 1017 1018 static void neigh_timer_handler(struct timer_list *t) 1019 { 1020 unsigned long now, next; 1021 struct neighbour *neigh = from_timer(neigh, t, timer); 1022 unsigned int state; 1023 int notify = 0; 1024 1025 write_lock(&neigh->lock); 1026 1027 state = neigh->nud_state; 1028 now = jiffies; 1029 next = now + HZ; 1030 1031 if (!(state & NUD_IN_TIMER)) 1032 goto out; 1033 1034 if (state & NUD_REACHABLE) { 1035 if (time_before_eq(now, 1036 neigh->confirmed + neigh->parms->reachable_time)) { 1037 neigh_dbg(2, "neigh %p is still alive\n", neigh); 1038 next = neigh->confirmed + neigh->parms->reachable_time; 1039 } else if (time_before_eq(now, 1040 neigh->used + 1041 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { 1042 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1043 neigh->nud_state = NUD_DELAY; 1044 neigh->updated = jiffies; 1045 neigh_suspect(neigh); 1046 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); 1047 } else { 1048 neigh_dbg(2, "neigh %p is suspected\n", neigh); 1049 neigh->nud_state = NUD_STALE; 1050 neigh->updated = jiffies; 1051 neigh_suspect(neigh); 1052 notify = 1; 1053 } 1054 } else if (state & NUD_DELAY) { 1055 if (time_before_eq(now, 1056 neigh->confirmed + 1057 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { 1058 neigh_dbg(2, "neigh %p is now reachable\n", neigh); 1059 neigh->nud_state = NUD_REACHABLE; 1060 neigh->updated = jiffies; 1061 neigh_connect(neigh); 1062 notify = 1; 1063 next = neigh->confirmed + neigh->parms->reachable_time; 1064 } else { 1065 neigh_dbg(2, "neigh %p is probed\n", neigh); 1066 neigh->nud_state = NUD_PROBE; 1067 neigh->updated = jiffies; 1068 atomic_set(&neigh->probes, 0); 1069 notify = 1; 1070 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); 1071 } 1072 } else { 1073 /* NUD_PROBE|NUD_INCOMPLETE */ 1074 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); 1075 } 1076 1077 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && 1078 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { 1079 neigh->nud_state = NUD_FAILED; 1080 notify = 1; 1081 neigh_invalidate(neigh); 1082 goto out; 1083 } 1084 1085 if (neigh->nud_state & NUD_IN_TIMER) { 1086 if (time_before(next, jiffies + HZ/2)) 1087 next = jiffies + HZ/2; 1088 if (!mod_timer(&neigh->timer, next)) 1089 neigh_hold(neigh); 1090 } 1091 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { 1092 neigh_probe(neigh); 1093 } else { 1094 out: 1095 write_unlock(&neigh->lock); 1096 } 1097 1098 if (notify) 1099 neigh_update_notify(neigh, 0); 1100 1101 trace_neigh_timer_handler(neigh, 0); 1102 1103 neigh_release(neigh); 1104 } 1105 1106 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) 1107 { 1108 int rc; 1109 bool immediate_probe = false; 1110 1111 write_lock_bh(&neigh->lock); 1112 1113 rc = 0; 1114 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) 1115 goto out_unlock_bh; 1116 if (neigh->dead) 1117 goto out_dead; 1118 1119 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { 1120 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + 1121 NEIGH_VAR(neigh->parms, APP_PROBES)) { 1122 unsigned long next, now = jiffies; 1123 1124 atomic_set(&neigh->probes, 1125 NEIGH_VAR(neigh->parms, UCAST_PROBES)); 1126 neigh->nud_state = NUD_INCOMPLETE; 1127 neigh->updated = now; 1128 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), 1129 HZ/2); 1130 neigh_add_timer(neigh, next); 1131 immediate_probe = true; 1132 } else { 1133 neigh->nud_state = NUD_FAILED; 1134 neigh->updated = jiffies; 1135 write_unlock_bh(&neigh->lock); 1136 1137 kfree_skb(skb); 1138 return 1; 1139 } 1140 } else if (neigh->nud_state & NUD_STALE) { 1141 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1142 neigh->nud_state = NUD_DELAY; 1143 neigh->updated = jiffies; 1144 neigh_add_timer(neigh, jiffies + 1145 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME)); 1146 } 1147 1148 if (neigh->nud_state == NUD_INCOMPLETE) { 1149 if (skb) { 1150 while (neigh->arp_queue_len_bytes + skb->truesize > 1151 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) { 1152 struct sk_buff *buff; 1153 1154 buff = __skb_dequeue(&neigh->arp_queue); 1155 if (!buff) 1156 break; 1157 neigh->arp_queue_len_bytes -= buff->truesize; 1158 kfree_skb(buff); 1159 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); 1160 } 1161 skb_dst_force(skb); 1162 __skb_queue_tail(&neigh->arp_queue, skb); 1163 neigh->arp_queue_len_bytes += skb->truesize; 1164 } 1165 rc = 1; 1166 } 1167 out_unlock_bh: 1168 if (immediate_probe) 1169 neigh_probe(neigh); 1170 else 1171 write_unlock(&neigh->lock); 1172 local_bh_enable(); 1173 trace_neigh_event_send_done(neigh, rc); 1174 return rc; 1175 1176 out_dead: 1177 if (neigh->nud_state & NUD_STALE) 1178 goto out_unlock_bh; 1179 write_unlock_bh(&neigh->lock); 1180 kfree_skb(skb); 1181 trace_neigh_event_send_dead(neigh, 1); 1182 return 1; 1183 } 1184 EXPORT_SYMBOL(__neigh_event_send); 1185 1186 static void neigh_update_hhs(struct neighbour *neigh) 1187 { 1188 struct hh_cache *hh; 1189 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) 1190 = NULL; 1191 1192 if (neigh->dev->header_ops) 1193 update = neigh->dev->header_ops->cache_update; 1194 1195 if (update) { 1196 hh = &neigh->hh; 1197 if (hh->hh_len) { 1198 write_seqlock_bh(&hh->hh_lock); 1199 update(hh, neigh->dev, neigh->ha); 1200 write_sequnlock_bh(&hh->hh_lock); 1201 } 1202 } 1203 } 1204 1205 1206 1207 /* Generic update routine. 1208 -- lladdr is new lladdr or NULL, if it is not supplied. 1209 -- new is new state. 1210 -- flags 1211 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr, 1212 if it is different. 1213 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected" 1214 lladdr instead of overriding it 1215 if it is different. 1216 NEIGH_UPDATE_F_ADMIN means that the change is administrative. 1217 1218 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing 1219 NTF_ROUTER flag. 1220 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as 1221 a router. 1222 1223 Caller MUST hold reference count on the entry. 1224 */ 1225 1226 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, 1227 u8 new, u32 flags, u32 nlmsg_pid, 1228 struct netlink_ext_ack *extack) 1229 { 1230 bool ext_learn_change = false; 1231 u8 old; 1232 int err; 1233 int notify = 0; 1234 struct net_device *dev; 1235 int update_isrouter = 0; 1236 1237 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid); 1238 1239 write_lock_bh(&neigh->lock); 1240 1241 dev = neigh->dev; 1242 old = neigh->nud_state; 1243 err = -EPERM; 1244 1245 if (!(flags & NEIGH_UPDATE_F_ADMIN) && 1246 (old & (NUD_NOARP | NUD_PERMANENT))) 1247 goto out; 1248 if (neigh->dead) { 1249 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead"); 1250 goto out; 1251 } 1252 1253 ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); 1254 1255 if (!(new & NUD_VALID)) { 1256 neigh_del_timer(neigh); 1257 if (old & NUD_CONNECTED) 1258 neigh_suspect(neigh); 1259 neigh->nud_state = new; 1260 err = 0; 1261 notify = old & NUD_VALID; 1262 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && 1263 (new & NUD_FAILED)) { 1264 neigh_invalidate(neigh); 1265 notify = 1; 1266 } 1267 goto out; 1268 } 1269 1270 /* Compare new lladdr with cached one */ 1271 if (!dev->addr_len) { 1272 /* First case: device needs no address. */ 1273 lladdr = neigh->ha; 1274 } else if (lladdr) { 1275 /* The second case: if something is already cached 1276 and a new address is proposed: 1277 - compare new & old 1278 - if they are different, check override flag 1279 */ 1280 if ((old & NUD_VALID) && 1281 !memcmp(lladdr, neigh->ha, dev->addr_len)) 1282 lladdr = neigh->ha; 1283 } else { 1284 /* No address is supplied; if we know something, 1285 use it, otherwise discard the request. 1286 */ 1287 err = -EINVAL; 1288 if (!(old & NUD_VALID)) { 1289 NL_SET_ERR_MSG(extack, "No link layer address given"); 1290 goto out; 1291 } 1292 lladdr = neigh->ha; 1293 } 1294 1295 /* Update confirmed timestamp for neighbour entry after we 1296 * received ARP packet even if it doesn't change IP to MAC binding. 1297 */ 1298 if (new & NUD_CONNECTED) 1299 neigh->confirmed = jiffies; 1300 1301 /* If entry was valid and address is not changed, 1302 do not change entry state, if new one is STALE. 1303 */ 1304 err = 0; 1305 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER; 1306 if (old & NUD_VALID) { 1307 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) { 1308 update_isrouter = 0; 1309 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) && 1310 (old & NUD_CONNECTED)) { 1311 lladdr = neigh->ha; 1312 new = NUD_STALE; 1313 } else 1314 goto out; 1315 } else { 1316 if (lladdr == neigh->ha && new == NUD_STALE && 1317 !(flags & NEIGH_UPDATE_F_ADMIN)) 1318 new = old; 1319 } 1320 } 1321 1322 /* Update timestamp only once we know we will make a change to the 1323 * neighbour entry. Otherwise we risk to move the locktime window with 1324 * noop updates and ignore relevant ARP updates. 1325 */ 1326 if (new != old || lladdr != neigh->ha) 1327 neigh->updated = jiffies; 1328 1329 if (new != old) { 1330 neigh_del_timer(neigh); 1331 if (new & NUD_PROBE) 1332 atomic_set(&neigh->probes, 0); 1333 if (new & NUD_IN_TIMER) 1334 neigh_add_timer(neigh, (jiffies + 1335 ((new & NUD_REACHABLE) ? 1336 neigh->parms->reachable_time : 1337 0))); 1338 neigh->nud_state = new; 1339 notify = 1; 1340 } 1341 1342 if (lladdr != neigh->ha) { 1343 write_seqlock(&neigh->ha_lock); 1344 memcpy(&neigh->ha, lladdr, dev->addr_len); 1345 write_sequnlock(&neigh->ha_lock); 1346 neigh_update_hhs(neigh); 1347 if (!(new & NUD_CONNECTED)) 1348 neigh->confirmed = jiffies - 1349 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1); 1350 notify = 1; 1351 } 1352 if (new == old) 1353 goto out; 1354 if (new & NUD_CONNECTED) 1355 neigh_connect(neigh); 1356 else 1357 neigh_suspect(neigh); 1358 if (!(old & NUD_VALID)) { 1359 struct sk_buff *skb; 1360 1361 /* Again: avoid dead loop if something went wrong */ 1362 1363 while (neigh->nud_state & NUD_VALID && 1364 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { 1365 struct dst_entry *dst = skb_dst(skb); 1366 struct neighbour *n2, *n1 = neigh; 1367 write_unlock_bh(&neigh->lock); 1368 1369 rcu_read_lock(); 1370 1371 /* Why not just use 'neigh' as-is? The problem is that 1372 * things such as shaper, eql, and sch_teql can end up 1373 * using alternative, different, neigh objects to output 1374 * the packet in the output path. So what we need to do 1375 * here is re-lookup the top-level neigh in the path so 1376 * we can reinject the packet there. 1377 */ 1378 n2 = NULL; 1379 if (dst) { 1380 n2 = dst_neigh_lookup_skb(dst, skb); 1381 if (n2) 1382 n1 = n2; 1383 } 1384 n1->output(n1, skb); 1385 if (n2) 1386 neigh_release(n2); 1387 rcu_read_unlock(); 1388 1389 write_lock_bh(&neigh->lock); 1390 } 1391 __skb_queue_purge(&neigh->arp_queue); 1392 neigh->arp_queue_len_bytes = 0; 1393 } 1394 out: 1395 if (update_isrouter) 1396 neigh_update_is_router(neigh, flags, ¬ify); 1397 write_unlock_bh(&neigh->lock); 1398 1399 if (((new ^ old) & NUD_PERMANENT) || ext_learn_change) 1400 neigh_update_gc_list(neigh); 1401 1402 if (notify) 1403 neigh_update_notify(neigh, nlmsg_pid); 1404 1405 trace_neigh_update_done(neigh, err); 1406 1407 return err; 1408 } 1409 1410 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, 1411 u32 flags, u32 nlmsg_pid) 1412 { 1413 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL); 1414 } 1415 EXPORT_SYMBOL(neigh_update); 1416 1417 /* Update the neigh to listen temporarily for probe responses, even if it is 1418 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing. 1419 */ 1420 void __neigh_set_probe_once(struct neighbour *neigh) 1421 { 1422 if (neigh->dead) 1423 return; 1424 neigh->updated = jiffies; 1425 if (!(neigh->nud_state & NUD_FAILED)) 1426 return; 1427 neigh->nud_state = NUD_INCOMPLETE; 1428 atomic_set(&neigh->probes, neigh_max_probes(neigh)); 1429 neigh_add_timer(neigh, 1430 jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME)); 1431 } 1432 EXPORT_SYMBOL(__neigh_set_probe_once); 1433 1434 struct neighbour *neigh_event_ns(struct neigh_table *tbl, 1435 u8 *lladdr, void *saddr, 1436 struct net_device *dev) 1437 { 1438 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev, 1439 lladdr || !dev->addr_len); 1440 if (neigh) 1441 neigh_update(neigh, lladdr, NUD_STALE, 1442 NEIGH_UPDATE_F_OVERRIDE, 0); 1443 return neigh; 1444 } 1445 EXPORT_SYMBOL(neigh_event_ns); 1446 1447 /* called with read_lock_bh(&n->lock); */ 1448 static void neigh_hh_init(struct neighbour *n) 1449 { 1450 struct net_device *dev = n->dev; 1451 __be16 prot = n->tbl->protocol; 1452 struct hh_cache *hh = &n->hh; 1453 1454 write_lock_bh(&n->lock); 1455 1456 /* Only one thread can come in here and initialize the 1457 * hh_cache entry. 1458 */ 1459 if (!hh->hh_len) 1460 dev->header_ops->cache(n, hh, prot); 1461 1462 write_unlock_bh(&n->lock); 1463 } 1464 1465 /* Slow and careful. */ 1466 1467 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) 1468 { 1469 int rc = 0; 1470 1471 if (!neigh_event_send(neigh, skb)) { 1472 int err; 1473 struct net_device *dev = neigh->dev; 1474 unsigned int seq; 1475 1476 if (dev->header_ops->cache && !neigh->hh.hh_len) 1477 neigh_hh_init(neigh); 1478 1479 do { 1480 __skb_pull(skb, skb_network_offset(skb)); 1481 seq = read_seqbegin(&neigh->ha_lock); 1482 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1483 neigh->ha, NULL, skb->len); 1484 } while (read_seqretry(&neigh->ha_lock, seq)); 1485 1486 if (err >= 0) 1487 rc = dev_queue_xmit(skb); 1488 else 1489 goto out_kfree_skb; 1490 } 1491 out: 1492 return rc; 1493 out_kfree_skb: 1494 rc = -EINVAL; 1495 kfree_skb(skb); 1496 goto out; 1497 } 1498 EXPORT_SYMBOL(neigh_resolve_output); 1499 1500 /* As fast as possible without hh cache */ 1501 1502 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb) 1503 { 1504 struct net_device *dev = neigh->dev; 1505 unsigned int seq; 1506 int err; 1507 1508 do { 1509 __skb_pull(skb, skb_network_offset(skb)); 1510 seq = read_seqbegin(&neigh->ha_lock); 1511 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1512 neigh->ha, NULL, skb->len); 1513 } while (read_seqretry(&neigh->ha_lock, seq)); 1514 1515 if (err >= 0) 1516 err = dev_queue_xmit(skb); 1517 else { 1518 err = -EINVAL; 1519 kfree_skb(skb); 1520 } 1521 return err; 1522 } 1523 EXPORT_SYMBOL(neigh_connected_output); 1524 1525 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb) 1526 { 1527 return dev_queue_xmit(skb); 1528 } 1529 EXPORT_SYMBOL(neigh_direct_output); 1530 1531 static void neigh_proxy_process(struct timer_list *t) 1532 { 1533 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer); 1534 long sched_next = 0; 1535 unsigned long now = jiffies; 1536 struct sk_buff *skb, *n; 1537 1538 spin_lock(&tbl->proxy_queue.lock); 1539 1540 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) { 1541 long tdif = NEIGH_CB(skb)->sched_next - now; 1542 1543 if (tdif <= 0) { 1544 struct net_device *dev = skb->dev; 1545 1546 __skb_unlink(skb, &tbl->proxy_queue); 1547 if (tbl->proxy_redo && netif_running(dev)) { 1548 rcu_read_lock(); 1549 tbl->proxy_redo(skb); 1550 rcu_read_unlock(); 1551 } else { 1552 kfree_skb(skb); 1553 } 1554 1555 dev_put(dev); 1556 } else if (!sched_next || tdif < sched_next) 1557 sched_next = tdif; 1558 } 1559 del_timer(&tbl->proxy_timer); 1560 if (sched_next) 1561 mod_timer(&tbl->proxy_timer, jiffies + sched_next); 1562 spin_unlock(&tbl->proxy_queue.lock); 1563 } 1564 1565 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, 1566 struct sk_buff *skb) 1567 { 1568 unsigned long now = jiffies; 1569 1570 unsigned long sched_next = now + (prandom_u32() % 1571 NEIGH_VAR(p, PROXY_DELAY)); 1572 1573 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { 1574 kfree_skb(skb); 1575 return; 1576 } 1577 1578 NEIGH_CB(skb)->sched_next = sched_next; 1579 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED; 1580 1581 spin_lock(&tbl->proxy_queue.lock); 1582 if (del_timer(&tbl->proxy_timer)) { 1583 if (time_before(tbl->proxy_timer.expires, sched_next)) 1584 sched_next = tbl->proxy_timer.expires; 1585 } 1586 skb_dst_drop(skb); 1587 dev_hold(skb->dev); 1588 __skb_queue_tail(&tbl->proxy_queue, skb); 1589 mod_timer(&tbl->proxy_timer, sched_next); 1590 spin_unlock(&tbl->proxy_queue.lock); 1591 } 1592 EXPORT_SYMBOL(pneigh_enqueue); 1593 1594 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, 1595 struct net *net, int ifindex) 1596 { 1597 struct neigh_parms *p; 1598 1599 list_for_each_entry(p, &tbl->parms_list, list) { 1600 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) || 1601 (!p->dev && !ifindex && net_eq(net, &init_net))) 1602 return p; 1603 } 1604 1605 return NULL; 1606 } 1607 1608 struct neigh_parms *neigh_parms_alloc(struct net_device *dev, 1609 struct neigh_table *tbl) 1610 { 1611 struct neigh_parms *p; 1612 struct net *net = dev_net(dev); 1613 const struct net_device_ops *ops = dev->netdev_ops; 1614 1615 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); 1616 if (p) { 1617 p->tbl = tbl; 1618 refcount_set(&p->refcnt, 1); 1619 p->reachable_time = 1620 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 1621 dev_hold(dev); 1622 p->dev = dev; 1623 write_pnet(&p->net, net); 1624 p->sysctl_table = NULL; 1625 1626 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { 1627 dev_put(dev); 1628 kfree(p); 1629 return NULL; 1630 } 1631 1632 write_lock_bh(&tbl->lock); 1633 list_add(&p->list, &tbl->parms.list); 1634 write_unlock_bh(&tbl->lock); 1635 1636 neigh_parms_data_state_cleanall(p); 1637 } 1638 return p; 1639 } 1640 EXPORT_SYMBOL(neigh_parms_alloc); 1641 1642 static void neigh_rcu_free_parms(struct rcu_head *head) 1643 { 1644 struct neigh_parms *parms = 1645 container_of(head, struct neigh_parms, rcu_head); 1646 1647 neigh_parms_put(parms); 1648 } 1649 1650 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) 1651 { 1652 if (!parms || parms == &tbl->parms) 1653 return; 1654 write_lock_bh(&tbl->lock); 1655 list_del(&parms->list); 1656 parms->dead = 1; 1657 write_unlock_bh(&tbl->lock); 1658 if (parms->dev) 1659 dev_put(parms->dev); 1660 call_rcu(&parms->rcu_head, neigh_rcu_free_parms); 1661 } 1662 EXPORT_SYMBOL(neigh_parms_release); 1663 1664 static void neigh_parms_destroy(struct neigh_parms *parms) 1665 { 1666 kfree(parms); 1667 } 1668 1669 static struct lock_class_key neigh_table_proxy_queue_class; 1670 1671 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly; 1672 1673 void neigh_table_init(int index, struct neigh_table *tbl) 1674 { 1675 unsigned long now = jiffies; 1676 unsigned long phsize; 1677 1678 INIT_LIST_HEAD(&tbl->parms_list); 1679 INIT_LIST_HEAD(&tbl->gc_list); 1680 list_add(&tbl->parms.list, &tbl->parms_list); 1681 write_pnet(&tbl->parms.net, &init_net); 1682 refcount_set(&tbl->parms.refcnt, 1); 1683 tbl->parms.reachable_time = 1684 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); 1685 1686 tbl->stats = alloc_percpu(struct neigh_statistics); 1687 if (!tbl->stats) 1688 panic("cannot create neighbour cache statistics"); 1689 1690 #ifdef CONFIG_PROC_FS 1691 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat, 1692 &neigh_stat_seq_ops, tbl)) 1693 panic("cannot create neighbour proc dir entry"); 1694 #endif 1695 1696 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3)); 1697 1698 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); 1699 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); 1700 1701 if (!tbl->nht || !tbl->phash_buckets) 1702 panic("cannot allocate neighbour cache hashes"); 1703 1704 if (!tbl->entry_size) 1705 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) + 1706 tbl->key_len, NEIGH_PRIV_ALIGN); 1707 else 1708 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); 1709 1710 rwlock_init(&tbl->lock); 1711 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); 1712 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 1713 tbl->parms.reachable_time); 1714 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0); 1715 skb_queue_head_init_class(&tbl->proxy_queue, 1716 &neigh_table_proxy_queue_class); 1717 1718 tbl->last_flush = now; 1719 tbl->last_rand = now + tbl->parms.reachable_time * 20; 1720 1721 neigh_tables[index] = tbl; 1722 } 1723 EXPORT_SYMBOL(neigh_table_init); 1724 1725 int neigh_table_clear(int index, struct neigh_table *tbl) 1726 { 1727 neigh_tables[index] = NULL; 1728 /* It is not clean... Fix it to unload IPv6 module safely */ 1729 cancel_delayed_work_sync(&tbl->gc_work); 1730 del_timer_sync(&tbl->proxy_timer); 1731 pneigh_queue_purge(&tbl->proxy_queue); 1732 neigh_ifdown(tbl, NULL); 1733 if (atomic_read(&tbl->entries)) 1734 pr_crit("neighbour leakage\n"); 1735 1736 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, 1737 neigh_hash_free_rcu); 1738 tbl->nht = NULL; 1739 1740 kfree(tbl->phash_buckets); 1741 tbl->phash_buckets = NULL; 1742 1743 remove_proc_entry(tbl->id, init_net.proc_net_stat); 1744 1745 free_percpu(tbl->stats); 1746 tbl->stats = NULL; 1747 1748 return 0; 1749 } 1750 EXPORT_SYMBOL(neigh_table_clear); 1751 1752 static struct neigh_table *neigh_find_table(int family) 1753 { 1754 struct neigh_table *tbl = NULL; 1755 1756 switch (family) { 1757 case AF_INET: 1758 tbl = neigh_tables[NEIGH_ARP_TABLE]; 1759 break; 1760 case AF_INET6: 1761 tbl = neigh_tables[NEIGH_ND_TABLE]; 1762 break; 1763 case AF_DECnet: 1764 tbl = neigh_tables[NEIGH_DN_TABLE]; 1765 break; 1766 } 1767 1768 return tbl; 1769 } 1770 1771 const struct nla_policy nda_policy[NDA_MAX+1] = { 1772 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, 1773 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, 1774 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) }, 1775 [NDA_PROBES] = { .type = NLA_U32 }, 1776 [NDA_VLAN] = { .type = NLA_U16 }, 1777 [NDA_PORT] = { .type = NLA_U16 }, 1778 [NDA_VNI] = { .type = NLA_U32 }, 1779 [NDA_IFINDEX] = { .type = NLA_U32 }, 1780 [NDA_MASTER] = { .type = NLA_U32 }, 1781 [NDA_PROTOCOL] = { .type = NLA_U8 }, 1782 }; 1783 1784 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, 1785 struct netlink_ext_ack *extack) 1786 { 1787 struct net *net = sock_net(skb->sk); 1788 struct ndmsg *ndm; 1789 struct nlattr *dst_attr; 1790 struct neigh_table *tbl; 1791 struct neighbour *neigh; 1792 struct net_device *dev = NULL; 1793 int err = -EINVAL; 1794 1795 ASSERT_RTNL(); 1796 if (nlmsg_len(nlh) < sizeof(*ndm)) 1797 goto out; 1798 1799 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST); 1800 if (!dst_attr) { 1801 NL_SET_ERR_MSG(extack, "Network address not specified"); 1802 goto out; 1803 } 1804 1805 ndm = nlmsg_data(nlh); 1806 if (ndm->ndm_ifindex) { 1807 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 1808 if (dev == NULL) { 1809 err = -ENODEV; 1810 goto out; 1811 } 1812 } 1813 1814 tbl = neigh_find_table(ndm->ndm_family); 1815 if (tbl == NULL) 1816 return -EAFNOSUPPORT; 1817 1818 if (nla_len(dst_attr) < (int)tbl->key_len) { 1819 NL_SET_ERR_MSG(extack, "Invalid network address"); 1820 goto out; 1821 } 1822 1823 if (ndm->ndm_flags & NTF_PROXY) { 1824 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); 1825 goto out; 1826 } 1827 1828 if (dev == NULL) 1829 goto out; 1830 1831 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); 1832 if (neigh == NULL) { 1833 err = -ENOENT; 1834 goto out; 1835 } 1836 1837 err = __neigh_update(neigh, NULL, NUD_FAILED, 1838 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, 1839 NETLINK_CB(skb).portid, extack); 1840 write_lock_bh(&tbl->lock); 1841 neigh_release(neigh); 1842 neigh_remove_one(neigh, tbl); 1843 write_unlock_bh(&tbl->lock); 1844 1845 out: 1846 return err; 1847 } 1848 1849 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, 1850 struct netlink_ext_ack *extack) 1851 { 1852 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE | 1853 NEIGH_UPDATE_F_OVERRIDE_ISROUTER; 1854 struct net *net = sock_net(skb->sk); 1855 struct ndmsg *ndm; 1856 struct nlattr *tb[NDA_MAX+1]; 1857 struct neigh_table *tbl; 1858 struct net_device *dev = NULL; 1859 struct neighbour *neigh; 1860 void *dst, *lladdr; 1861 u8 protocol = 0; 1862 int err; 1863 1864 ASSERT_RTNL(); 1865 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nda_policy, extack); 1866 if (err < 0) 1867 goto out; 1868 1869 err = -EINVAL; 1870 if (!tb[NDA_DST]) { 1871 NL_SET_ERR_MSG(extack, "Network address not specified"); 1872 goto out; 1873 } 1874 1875 ndm = nlmsg_data(nlh); 1876 if (ndm->ndm_ifindex) { 1877 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 1878 if (dev == NULL) { 1879 err = -ENODEV; 1880 goto out; 1881 } 1882 1883 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) { 1884 NL_SET_ERR_MSG(extack, "Invalid link address"); 1885 goto out; 1886 } 1887 } 1888 1889 tbl = neigh_find_table(ndm->ndm_family); 1890 if (tbl == NULL) 1891 return -EAFNOSUPPORT; 1892 1893 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) { 1894 NL_SET_ERR_MSG(extack, "Invalid network address"); 1895 goto out; 1896 } 1897 1898 dst = nla_data(tb[NDA_DST]); 1899 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; 1900 1901 if (tb[NDA_PROTOCOL]) 1902 protocol = nla_get_u8(tb[NDA_PROTOCOL]); 1903 1904 if (ndm->ndm_flags & NTF_PROXY) { 1905 struct pneigh_entry *pn; 1906 1907 err = -ENOBUFS; 1908 pn = pneigh_lookup(tbl, net, dst, dev, 1); 1909 if (pn) { 1910 pn->flags = ndm->ndm_flags; 1911 if (protocol) 1912 pn->protocol = protocol; 1913 err = 0; 1914 } 1915 goto out; 1916 } 1917 1918 if (!dev) { 1919 NL_SET_ERR_MSG(extack, "Device not specified"); 1920 goto out; 1921 } 1922 1923 if (tbl->allow_add && !tbl->allow_add(dev, extack)) { 1924 err = -EINVAL; 1925 goto out; 1926 } 1927 1928 neigh = neigh_lookup(tbl, dst, dev); 1929 if (neigh == NULL) { 1930 bool exempt_from_gc; 1931 1932 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { 1933 err = -ENOENT; 1934 goto out; 1935 } 1936 1937 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || 1938 ndm->ndm_flags & NTF_EXT_LEARNED; 1939 neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true); 1940 if (IS_ERR(neigh)) { 1941 err = PTR_ERR(neigh); 1942 goto out; 1943 } 1944 } else { 1945 if (nlh->nlmsg_flags & NLM_F_EXCL) { 1946 err = -EEXIST; 1947 neigh_release(neigh); 1948 goto out; 1949 } 1950 1951 if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) 1952 flags &= ~(NEIGH_UPDATE_F_OVERRIDE | 1953 NEIGH_UPDATE_F_OVERRIDE_ISROUTER); 1954 } 1955 1956 if (ndm->ndm_flags & NTF_EXT_LEARNED) 1957 flags |= NEIGH_UPDATE_F_EXT_LEARNED; 1958 1959 if (ndm->ndm_flags & NTF_ROUTER) 1960 flags |= NEIGH_UPDATE_F_ISROUTER; 1961 1962 if (ndm->ndm_flags & NTF_USE) { 1963 neigh_event_send(neigh, NULL); 1964 err = 0; 1965 } else 1966 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, 1967 NETLINK_CB(skb).portid, extack); 1968 1969 if (protocol) 1970 neigh->protocol = protocol; 1971 1972 neigh_release(neigh); 1973 1974 out: 1975 return err; 1976 } 1977 1978 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) 1979 { 1980 struct nlattr *nest; 1981 1982 nest = nla_nest_start(skb, NDTA_PARMS); 1983 if (nest == NULL) 1984 return -ENOBUFS; 1985 1986 if ((parms->dev && 1987 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || 1988 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) || 1989 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, 1990 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) || 1991 /* approximative value for deprecated QUEUE_LEN (in packets) */ 1992 nla_put_u32(skb, NDTPA_QUEUE_LEN, 1993 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) || 1994 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) || 1995 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) || 1996 nla_put_u32(skb, NDTPA_UCAST_PROBES, 1997 NEIGH_VAR(parms, UCAST_PROBES)) || 1998 nla_put_u32(skb, NDTPA_MCAST_PROBES, 1999 NEIGH_VAR(parms, MCAST_PROBES)) || 2000 nla_put_u32(skb, NDTPA_MCAST_REPROBES, 2001 NEIGH_VAR(parms, MCAST_REPROBES)) || 2002 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time, 2003 NDTPA_PAD) || 2004 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, 2005 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) || 2006 nla_put_msecs(skb, NDTPA_GC_STALETIME, 2007 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) || 2008 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, 2009 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) || 2010 nla_put_msecs(skb, NDTPA_RETRANS_TIME, 2011 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) || 2012 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, 2013 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) || 2014 nla_put_msecs(skb, NDTPA_PROXY_DELAY, 2015 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) || 2016 nla_put_msecs(skb, NDTPA_LOCKTIME, 2017 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD)) 2018 goto nla_put_failure; 2019 return nla_nest_end(skb, nest); 2020 2021 nla_put_failure: 2022 nla_nest_cancel(skb, nest); 2023 return -EMSGSIZE; 2024 } 2025 2026 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, 2027 u32 pid, u32 seq, int type, int flags) 2028 { 2029 struct nlmsghdr *nlh; 2030 struct ndtmsg *ndtmsg; 2031 2032 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); 2033 if (nlh == NULL) 2034 return -EMSGSIZE; 2035 2036 ndtmsg = nlmsg_data(nlh); 2037 2038 read_lock_bh(&tbl->lock); 2039 ndtmsg->ndtm_family = tbl->family; 2040 ndtmsg->ndtm_pad1 = 0; 2041 ndtmsg->ndtm_pad2 = 0; 2042 2043 if (nla_put_string(skb, NDTA_NAME, tbl->id) || 2044 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) || 2045 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) || 2046 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) || 2047 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3)) 2048 goto nla_put_failure; 2049 { 2050 unsigned long now = jiffies; 2051 unsigned int flush_delta = now - tbl->last_flush; 2052 unsigned int rand_delta = now - tbl->last_rand; 2053 struct neigh_hash_table *nht; 2054 struct ndt_config ndc = { 2055 .ndtc_key_len = tbl->key_len, 2056 .ndtc_entry_size = tbl->entry_size, 2057 .ndtc_entries = atomic_read(&tbl->entries), 2058 .ndtc_last_flush = jiffies_to_msecs(flush_delta), 2059 .ndtc_last_rand = jiffies_to_msecs(rand_delta), 2060 .ndtc_proxy_qlen = tbl->proxy_queue.qlen, 2061 }; 2062 2063 rcu_read_lock_bh(); 2064 nht = rcu_dereference_bh(tbl->nht); 2065 ndc.ndtc_hash_rnd = nht->hash_rnd[0]; 2066 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); 2067 rcu_read_unlock_bh(); 2068 2069 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc)) 2070 goto nla_put_failure; 2071 } 2072 2073 { 2074 int cpu; 2075 struct ndt_stats ndst; 2076 2077 memset(&ndst, 0, sizeof(ndst)); 2078 2079 for_each_possible_cpu(cpu) { 2080 struct neigh_statistics *st; 2081 2082 st = per_cpu_ptr(tbl->stats, cpu); 2083 ndst.ndts_allocs += st->allocs; 2084 ndst.ndts_destroys += st->destroys; 2085 ndst.ndts_hash_grows += st->hash_grows; 2086 ndst.ndts_res_failed += st->res_failed; 2087 ndst.ndts_lookups += st->lookups; 2088 ndst.ndts_hits += st->hits; 2089 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast; 2090 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast; 2091 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs; 2092 ndst.ndts_forced_gc_runs += st->forced_gc_runs; 2093 ndst.ndts_table_fulls += st->table_fulls; 2094 } 2095 2096 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst, 2097 NDTA_PAD)) 2098 goto nla_put_failure; 2099 } 2100 2101 BUG_ON(tbl->parms.dev); 2102 if (neightbl_fill_parms(skb, &tbl->parms) < 0) 2103 goto nla_put_failure; 2104 2105 read_unlock_bh(&tbl->lock); 2106 nlmsg_end(skb, nlh); 2107 return 0; 2108 2109 nla_put_failure: 2110 read_unlock_bh(&tbl->lock); 2111 nlmsg_cancel(skb, nlh); 2112 return -EMSGSIZE; 2113 } 2114 2115 static int neightbl_fill_param_info(struct sk_buff *skb, 2116 struct neigh_table *tbl, 2117 struct neigh_parms *parms, 2118 u32 pid, u32 seq, int type, 2119 unsigned int flags) 2120 { 2121 struct ndtmsg *ndtmsg; 2122 struct nlmsghdr *nlh; 2123 2124 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); 2125 if (nlh == NULL) 2126 return -EMSGSIZE; 2127 2128 ndtmsg = nlmsg_data(nlh); 2129 2130 read_lock_bh(&tbl->lock); 2131 ndtmsg->ndtm_family = tbl->family; 2132 ndtmsg->ndtm_pad1 = 0; 2133 ndtmsg->ndtm_pad2 = 0; 2134 2135 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 || 2136 neightbl_fill_parms(skb, parms) < 0) 2137 goto errout; 2138 2139 read_unlock_bh(&tbl->lock); 2140 nlmsg_end(skb, nlh); 2141 return 0; 2142 errout: 2143 read_unlock_bh(&tbl->lock); 2144 nlmsg_cancel(skb, nlh); 2145 return -EMSGSIZE; 2146 } 2147 2148 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { 2149 [NDTA_NAME] = { .type = NLA_STRING }, 2150 [NDTA_THRESH1] = { .type = NLA_U32 }, 2151 [NDTA_THRESH2] = { .type = NLA_U32 }, 2152 [NDTA_THRESH3] = { .type = NLA_U32 }, 2153 [NDTA_GC_INTERVAL] = { .type = NLA_U64 }, 2154 [NDTA_PARMS] = { .type = NLA_NESTED }, 2155 }; 2156 2157 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { 2158 [NDTPA_IFINDEX] = { .type = NLA_U32 }, 2159 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, 2160 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, 2161 [NDTPA_APP_PROBES] = { .type = NLA_U32 }, 2162 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, 2163 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 }, 2164 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 }, 2165 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, 2166 [NDTPA_GC_STALETIME] = { .type = NLA_U64 }, 2167 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 }, 2168 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 }, 2169 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 }, 2170 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 }, 2171 [NDTPA_LOCKTIME] = { .type = NLA_U64 }, 2172 }; 2173 2174 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, 2175 struct netlink_ext_ack *extack) 2176 { 2177 struct net *net = sock_net(skb->sk); 2178 struct neigh_table *tbl; 2179 struct ndtmsg *ndtmsg; 2180 struct nlattr *tb[NDTA_MAX+1]; 2181 bool found = false; 2182 int err, tidx; 2183 2184 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, 2185 nl_neightbl_policy, extack); 2186 if (err < 0) 2187 goto errout; 2188 2189 if (tb[NDTA_NAME] == NULL) { 2190 err = -EINVAL; 2191 goto errout; 2192 } 2193 2194 ndtmsg = nlmsg_data(nlh); 2195 2196 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { 2197 tbl = neigh_tables[tidx]; 2198 if (!tbl) 2199 continue; 2200 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) 2201 continue; 2202 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) { 2203 found = true; 2204 break; 2205 } 2206 } 2207 2208 if (!found) 2209 return -ENOENT; 2210 2211 /* 2212 * We acquire tbl->lock to be nice to the periodic timers and 2213 * make sure they always see a consistent set of values. 2214 */ 2215 write_lock_bh(&tbl->lock); 2216 2217 if (tb[NDTA_PARMS]) { 2218 struct nlattr *tbp[NDTPA_MAX+1]; 2219 struct neigh_parms *p; 2220 int i, ifindex = 0; 2221 2222 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS], 2223 nl_ntbl_parm_policy, extack); 2224 if (err < 0) 2225 goto errout_tbl_lock; 2226 2227 if (tbp[NDTPA_IFINDEX]) 2228 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); 2229 2230 p = lookup_neigh_parms(tbl, net, ifindex); 2231 if (p == NULL) { 2232 err = -ENOENT; 2233 goto errout_tbl_lock; 2234 } 2235 2236 for (i = 1; i <= NDTPA_MAX; i++) { 2237 if (tbp[i] == NULL) 2238 continue; 2239 2240 switch (i) { 2241 case NDTPA_QUEUE_LEN: 2242 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, 2243 nla_get_u32(tbp[i]) * 2244 SKB_TRUESIZE(ETH_FRAME_LEN)); 2245 break; 2246 case NDTPA_QUEUE_LENBYTES: 2247 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, 2248 nla_get_u32(tbp[i])); 2249 break; 2250 case NDTPA_PROXY_QLEN: 2251 NEIGH_VAR_SET(p, PROXY_QLEN, 2252 nla_get_u32(tbp[i])); 2253 break; 2254 case NDTPA_APP_PROBES: 2255 NEIGH_VAR_SET(p, APP_PROBES, 2256 nla_get_u32(tbp[i])); 2257 break; 2258 case NDTPA_UCAST_PROBES: 2259 NEIGH_VAR_SET(p, UCAST_PROBES, 2260 nla_get_u32(tbp[i])); 2261 break; 2262 case NDTPA_MCAST_PROBES: 2263 NEIGH_VAR_SET(p, MCAST_PROBES, 2264 nla_get_u32(tbp[i])); 2265 break; 2266 case NDTPA_MCAST_REPROBES: 2267 NEIGH_VAR_SET(p, MCAST_REPROBES, 2268 nla_get_u32(tbp[i])); 2269 break; 2270 case NDTPA_BASE_REACHABLE_TIME: 2271 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, 2272 nla_get_msecs(tbp[i])); 2273 /* update reachable_time as well, otherwise, the change will 2274 * only be effective after the next time neigh_periodic_work 2275 * decides to recompute it (can be multiple minutes) 2276 */ 2277 p->reachable_time = 2278 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 2279 break; 2280 case NDTPA_GC_STALETIME: 2281 NEIGH_VAR_SET(p, GC_STALETIME, 2282 nla_get_msecs(tbp[i])); 2283 break; 2284 case NDTPA_DELAY_PROBE_TIME: 2285 NEIGH_VAR_SET(p, DELAY_PROBE_TIME, 2286 nla_get_msecs(tbp[i])); 2287 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); 2288 break; 2289 case NDTPA_RETRANS_TIME: 2290 NEIGH_VAR_SET(p, RETRANS_TIME, 2291 nla_get_msecs(tbp[i])); 2292 break; 2293 case NDTPA_ANYCAST_DELAY: 2294 NEIGH_VAR_SET(p, ANYCAST_DELAY, 2295 nla_get_msecs(tbp[i])); 2296 break; 2297 case NDTPA_PROXY_DELAY: 2298 NEIGH_VAR_SET(p, PROXY_DELAY, 2299 nla_get_msecs(tbp[i])); 2300 break; 2301 case NDTPA_LOCKTIME: 2302 NEIGH_VAR_SET(p, LOCKTIME, 2303 nla_get_msecs(tbp[i])); 2304 break; 2305 } 2306 } 2307 } 2308 2309 err = -ENOENT; 2310 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] || 2311 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) && 2312 !net_eq(net, &init_net)) 2313 goto errout_tbl_lock; 2314 2315 if (tb[NDTA_THRESH1]) 2316 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); 2317 2318 if (tb[NDTA_THRESH2]) 2319 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]); 2320 2321 if (tb[NDTA_THRESH3]) 2322 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]); 2323 2324 if (tb[NDTA_GC_INTERVAL]) 2325 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]); 2326 2327 err = 0; 2328 2329 errout_tbl_lock: 2330 write_unlock_bh(&tbl->lock); 2331 errout: 2332 return err; 2333 } 2334 2335 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh, 2336 struct netlink_ext_ack *extack) 2337 { 2338 struct ndtmsg *ndtm; 2339 2340 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) { 2341 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request"); 2342 return -EINVAL; 2343 } 2344 2345 ndtm = nlmsg_data(nlh); 2346 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) { 2347 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request"); 2348 return -EINVAL; 2349 } 2350 2351 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) { 2352 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request"); 2353 return -EINVAL; 2354 } 2355 2356 return 0; 2357 } 2358 2359 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2360 { 2361 const struct nlmsghdr *nlh = cb->nlh; 2362 struct net *net = sock_net(skb->sk); 2363 int family, tidx, nidx = 0; 2364 int tbl_skip = cb->args[0]; 2365 int neigh_skip = cb->args[1]; 2366 struct neigh_table *tbl; 2367 2368 if (cb->strict_check) { 2369 int err = neightbl_valid_dump_info(nlh, cb->extack); 2370 2371 if (err < 0) 2372 return err; 2373 } 2374 2375 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; 2376 2377 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { 2378 struct neigh_parms *p; 2379 2380 tbl = neigh_tables[tidx]; 2381 if (!tbl) 2382 continue; 2383 2384 if (tidx < tbl_skip || (family && tbl->family != family)) 2385 continue; 2386 2387 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, 2388 nlh->nlmsg_seq, RTM_NEWNEIGHTBL, 2389 NLM_F_MULTI) < 0) 2390 break; 2391 2392 nidx = 0; 2393 p = list_next_entry(&tbl->parms, list); 2394 list_for_each_entry_from(p, &tbl->parms_list, list) { 2395 if (!net_eq(neigh_parms_net(p), net)) 2396 continue; 2397 2398 if (nidx < neigh_skip) 2399 goto next; 2400 2401 if (neightbl_fill_param_info(skb, tbl, p, 2402 NETLINK_CB(cb->skb).portid, 2403 nlh->nlmsg_seq, 2404 RTM_NEWNEIGHTBL, 2405 NLM_F_MULTI) < 0) 2406 goto out; 2407 next: 2408 nidx++; 2409 } 2410 2411 neigh_skip = 0; 2412 } 2413 out: 2414 cb->args[0] = tidx; 2415 cb->args[1] = nidx; 2416 2417 return skb->len; 2418 } 2419 2420 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, 2421 u32 pid, u32 seq, int type, unsigned int flags) 2422 { 2423 unsigned long now = jiffies; 2424 struct nda_cacheinfo ci; 2425 struct nlmsghdr *nlh; 2426 struct ndmsg *ndm; 2427 2428 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); 2429 if (nlh == NULL) 2430 return -EMSGSIZE; 2431 2432 ndm = nlmsg_data(nlh); 2433 ndm->ndm_family = neigh->ops->family; 2434 ndm->ndm_pad1 = 0; 2435 ndm->ndm_pad2 = 0; 2436 ndm->ndm_flags = neigh->flags; 2437 ndm->ndm_type = neigh->type; 2438 ndm->ndm_ifindex = neigh->dev->ifindex; 2439 2440 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key)) 2441 goto nla_put_failure; 2442 2443 read_lock_bh(&neigh->lock); 2444 ndm->ndm_state = neigh->nud_state; 2445 if (neigh->nud_state & NUD_VALID) { 2446 char haddr[MAX_ADDR_LEN]; 2447 2448 neigh_ha_snapshot(haddr, neigh, neigh->dev); 2449 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) { 2450 read_unlock_bh(&neigh->lock); 2451 goto nla_put_failure; 2452 } 2453 } 2454 2455 ci.ndm_used = jiffies_to_clock_t(now - neigh->used); 2456 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed); 2457 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated); 2458 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1; 2459 read_unlock_bh(&neigh->lock); 2460 2461 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) || 2462 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) 2463 goto nla_put_failure; 2464 2465 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol)) 2466 goto nla_put_failure; 2467 2468 nlmsg_end(skb, nlh); 2469 return 0; 2470 2471 nla_put_failure: 2472 nlmsg_cancel(skb, nlh); 2473 return -EMSGSIZE; 2474 } 2475 2476 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, 2477 u32 pid, u32 seq, int type, unsigned int flags, 2478 struct neigh_table *tbl) 2479 { 2480 struct nlmsghdr *nlh; 2481 struct ndmsg *ndm; 2482 2483 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); 2484 if (nlh == NULL) 2485 return -EMSGSIZE; 2486 2487 ndm = nlmsg_data(nlh); 2488 ndm->ndm_family = tbl->family; 2489 ndm->ndm_pad1 = 0; 2490 ndm->ndm_pad2 = 0; 2491 ndm->ndm_flags = pn->flags | NTF_PROXY; 2492 ndm->ndm_type = RTN_UNICAST; 2493 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0; 2494 ndm->ndm_state = NUD_NONE; 2495 2496 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) 2497 goto nla_put_failure; 2498 2499 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol)) 2500 goto nla_put_failure; 2501 2502 nlmsg_end(skb, nlh); 2503 return 0; 2504 2505 nla_put_failure: 2506 nlmsg_cancel(skb, nlh); 2507 return -EMSGSIZE; 2508 } 2509 2510 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid) 2511 { 2512 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); 2513 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid); 2514 } 2515 2516 static bool neigh_master_filtered(struct net_device *dev, int master_idx) 2517 { 2518 struct net_device *master; 2519 2520 if (!master_idx) 2521 return false; 2522 2523 master = dev ? netdev_master_upper_dev_get(dev) : NULL; 2524 if (!master || master->ifindex != master_idx) 2525 return true; 2526 2527 return false; 2528 } 2529 2530 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx) 2531 { 2532 if (filter_idx && (!dev || dev->ifindex != filter_idx)) 2533 return true; 2534 2535 return false; 2536 } 2537 2538 struct neigh_dump_filter { 2539 int master_idx; 2540 int dev_idx; 2541 }; 2542 2543 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2544 struct netlink_callback *cb, 2545 struct neigh_dump_filter *filter) 2546 { 2547 struct net *net = sock_net(skb->sk); 2548 struct neighbour *n; 2549 int rc, h, s_h = cb->args[1]; 2550 int idx, s_idx = idx = cb->args[2]; 2551 struct neigh_hash_table *nht; 2552 unsigned int flags = NLM_F_MULTI; 2553 2554 if (filter->dev_idx || filter->master_idx) 2555 flags |= NLM_F_DUMP_FILTERED; 2556 2557 rcu_read_lock_bh(); 2558 nht = rcu_dereference_bh(tbl->nht); 2559 2560 for (h = s_h; h < (1 << nht->hash_shift); h++) { 2561 if (h > s_h) 2562 s_idx = 0; 2563 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; 2564 n != NULL; 2565 n = rcu_dereference_bh(n->next)) { 2566 if (idx < s_idx || !net_eq(dev_net(n->dev), net)) 2567 goto next; 2568 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || 2569 neigh_master_filtered(n->dev, filter->master_idx)) 2570 goto next; 2571 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2572 cb->nlh->nlmsg_seq, 2573 RTM_NEWNEIGH, 2574 flags) < 0) { 2575 rc = -1; 2576 goto out; 2577 } 2578 next: 2579 idx++; 2580 } 2581 } 2582 rc = skb->len; 2583 out: 2584 rcu_read_unlock_bh(); 2585 cb->args[1] = h; 2586 cb->args[2] = idx; 2587 return rc; 2588 } 2589 2590 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2591 struct netlink_callback *cb, 2592 struct neigh_dump_filter *filter) 2593 { 2594 struct pneigh_entry *n; 2595 struct net *net = sock_net(skb->sk); 2596 int rc, h, s_h = cb->args[3]; 2597 int idx, s_idx = idx = cb->args[4]; 2598 unsigned int flags = NLM_F_MULTI; 2599 2600 if (filter->dev_idx || filter->master_idx) 2601 flags |= NLM_F_DUMP_FILTERED; 2602 2603 read_lock_bh(&tbl->lock); 2604 2605 for (h = s_h; h <= PNEIGH_HASHMASK; h++) { 2606 if (h > s_h) 2607 s_idx = 0; 2608 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { 2609 if (idx < s_idx || pneigh_net(n) != net) 2610 goto next; 2611 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || 2612 neigh_master_filtered(n->dev, filter->master_idx)) 2613 goto next; 2614 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2615 cb->nlh->nlmsg_seq, 2616 RTM_NEWNEIGH, flags, tbl) < 0) { 2617 read_unlock_bh(&tbl->lock); 2618 rc = -1; 2619 goto out; 2620 } 2621 next: 2622 idx++; 2623 } 2624 } 2625 2626 read_unlock_bh(&tbl->lock); 2627 rc = skb->len; 2628 out: 2629 cb->args[3] = h; 2630 cb->args[4] = idx; 2631 return rc; 2632 2633 } 2634 2635 static int neigh_valid_dump_req(const struct nlmsghdr *nlh, 2636 bool strict_check, 2637 struct neigh_dump_filter *filter, 2638 struct netlink_ext_ack *extack) 2639 { 2640 struct nlattr *tb[NDA_MAX + 1]; 2641 int err, i; 2642 2643 if (strict_check) { 2644 struct ndmsg *ndm; 2645 2646 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { 2647 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request"); 2648 return -EINVAL; 2649 } 2650 2651 ndm = nlmsg_data(nlh); 2652 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex || 2653 ndm->ndm_state || ndm->ndm_type) { 2654 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request"); 2655 return -EINVAL; 2656 } 2657 2658 if (ndm->ndm_flags & ~NTF_PROXY) { 2659 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request"); 2660 return -EINVAL; 2661 } 2662 2663 err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, 2664 nda_policy, extack); 2665 } else { 2666 err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, 2667 nda_policy, extack); 2668 } 2669 if (err < 0) 2670 return err; 2671 2672 for (i = 0; i <= NDA_MAX; ++i) { 2673 if (!tb[i]) 2674 continue; 2675 2676 /* all new attributes should require strict_check */ 2677 switch (i) { 2678 case NDA_IFINDEX: 2679 filter->dev_idx = nla_get_u32(tb[i]); 2680 break; 2681 case NDA_MASTER: 2682 filter->master_idx = nla_get_u32(tb[i]); 2683 break; 2684 default: 2685 if (strict_check) { 2686 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request"); 2687 return -EINVAL; 2688 } 2689 } 2690 } 2691 2692 return 0; 2693 } 2694 2695 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2696 { 2697 const struct nlmsghdr *nlh = cb->nlh; 2698 struct neigh_dump_filter filter = {}; 2699 struct neigh_table *tbl; 2700 int t, family, s_t; 2701 int proxy = 0; 2702 int err; 2703 2704 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; 2705 2706 /* check for full ndmsg structure presence, family member is 2707 * the same for both structures 2708 */ 2709 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) && 2710 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY) 2711 proxy = 1; 2712 2713 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack); 2714 if (err < 0 && cb->strict_check) 2715 return err; 2716 2717 s_t = cb->args[0]; 2718 2719 for (t = 0; t < NEIGH_NR_TABLES; t++) { 2720 tbl = neigh_tables[t]; 2721 2722 if (!tbl) 2723 continue; 2724 if (t < s_t || (family && tbl->family != family)) 2725 continue; 2726 if (t > s_t) 2727 memset(&cb->args[1], 0, sizeof(cb->args) - 2728 sizeof(cb->args[0])); 2729 if (proxy) 2730 err = pneigh_dump_table(tbl, skb, cb, &filter); 2731 else 2732 err = neigh_dump_table(tbl, skb, cb, &filter); 2733 if (err < 0) 2734 break; 2735 } 2736 2737 cb->args[0] = t; 2738 return skb->len; 2739 } 2740 2741 static int neigh_valid_get_req(const struct nlmsghdr *nlh, 2742 struct neigh_table **tbl, 2743 void **dst, int *dev_idx, u8 *ndm_flags, 2744 struct netlink_ext_ack *extack) 2745 { 2746 struct nlattr *tb[NDA_MAX + 1]; 2747 struct ndmsg *ndm; 2748 int err, i; 2749 2750 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { 2751 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request"); 2752 return -EINVAL; 2753 } 2754 2755 ndm = nlmsg_data(nlh); 2756 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || 2757 ndm->ndm_type) { 2758 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request"); 2759 return -EINVAL; 2760 } 2761 2762 if (ndm->ndm_flags & ~NTF_PROXY) { 2763 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request"); 2764 return -EINVAL; 2765 } 2766 2767 err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, 2768 nda_policy, extack); 2769 if (err < 0) 2770 return err; 2771 2772 *ndm_flags = ndm->ndm_flags; 2773 *dev_idx = ndm->ndm_ifindex; 2774 *tbl = neigh_find_table(ndm->ndm_family); 2775 if (*tbl == NULL) { 2776 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request"); 2777 return -EAFNOSUPPORT; 2778 } 2779 2780 for (i = 0; i <= NDA_MAX; ++i) { 2781 if (!tb[i]) 2782 continue; 2783 2784 switch (i) { 2785 case NDA_DST: 2786 if (nla_len(tb[i]) != (int)(*tbl)->key_len) { 2787 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request"); 2788 return -EINVAL; 2789 } 2790 *dst = nla_data(tb[i]); 2791 break; 2792 default: 2793 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request"); 2794 return -EINVAL; 2795 } 2796 } 2797 2798 return 0; 2799 } 2800 2801 static inline size_t neigh_nlmsg_size(void) 2802 { 2803 return NLMSG_ALIGN(sizeof(struct ndmsg)) 2804 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ 2805 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */ 2806 + nla_total_size(sizeof(struct nda_cacheinfo)) 2807 + nla_total_size(4) /* NDA_PROBES */ 2808 + nla_total_size(1); /* NDA_PROTOCOL */ 2809 } 2810 2811 static int neigh_get_reply(struct net *net, struct neighbour *neigh, 2812 u32 pid, u32 seq) 2813 { 2814 struct sk_buff *skb; 2815 int err = 0; 2816 2817 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL); 2818 if (!skb) 2819 return -ENOBUFS; 2820 2821 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0); 2822 if (err) { 2823 kfree_skb(skb); 2824 goto errout; 2825 } 2826 2827 err = rtnl_unicast(skb, net, pid); 2828 errout: 2829 return err; 2830 } 2831 2832 static inline size_t pneigh_nlmsg_size(void) 2833 { 2834 return NLMSG_ALIGN(sizeof(struct ndmsg)) 2835 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ 2836 + nla_total_size(1); /* NDA_PROTOCOL */ 2837 } 2838 2839 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh, 2840 u32 pid, u32 seq, struct neigh_table *tbl) 2841 { 2842 struct sk_buff *skb; 2843 int err = 0; 2844 2845 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL); 2846 if (!skb) 2847 return -ENOBUFS; 2848 2849 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl); 2850 if (err) { 2851 kfree_skb(skb); 2852 goto errout; 2853 } 2854 2855 err = rtnl_unicast(skb, net, pid); 2856 errout: 2857 return err; 2858 } 2859 2860 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2861 struct netlink_ext_ack *extack) 2862 { 2863 struct net *net = sock_net(in_skb->sk); 2864 struct net_device *dev = NULL; 2865 struct neigh_table *tbl = NULL; 2866 struct neighbour *neigh; 2867 void *dst = NULL; 2868 u8 ndm_flags = 0; 2869 int dev_idx = 0; 2870 int err; 2871 2872 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags, 2873 extack); 2874 if (err < 0) 2875 return err; 2876 2877 if (dev_idx) { 2878 dev = __dev_get_by_index(net, dev_idx); 2879 if (!dev) { 2880 NL_SET_ERR_MSG(extack, "Unknown device ifindex"); 2881 return -ENODEV; 2882 } 2883 } 2884 2885 if (!dst) { 2886 NL_SET_ERR_MSG(extack, "Network address not specified"); 2887 return -EINVAL; 2888 } 2889 2890 if (ndm_flags & NTF_PROXY) { 2891 struct pneigh_entry *pn; 2892 2893 pn = pneigh_lookup(tbl, net, dst, dev, 0); 2894 if (!pn) { 2895 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found"); 2896 return -ENOENT; 2897 } 2898 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid, 2899 nlh->nlmsg_seq, tbl); 2900 } 2901 2902 if (!dev) { 2903 NL_SET_ERR_MSG(extack, "No device specified"); 2904 return -EINVAL; 2905 } 2906 2907 neigh = neigh_lookup(tbl, dst, dev); 2908 if (!neigh) { 2909 NL_SET_ERR_MSG(extack, "Neighbour entry not found"); 2910 return -ENOENT; 2911 } 2912 2913 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid, 2914 nlh->nlmsg_seq); 2915 2916 neigh_release(neigh); 2917 2918 return err; 2919 } 2920 2921 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) 2922 { 2923 int chain; 2924 struct neigh_hash_table *nht; 2925 2926 rcu_read_lock_bh(); 2927 nht = rcu_dereference_bh(tbl->nht); 2928 2929 read_lock(&tbl->lock); /* avoid resizes */ 2930 for (chain = 0; chain < (1 << nht->hash_shift); chain++) { 2931 struct neighbour *n; 2932 2933 for (n = rcu_dereference_bh(nht->hash_buckets[chain]); 2934 n != NULL; 2935 n = rcu_dereference_bh(n->next)) 2936 cb(n, cookie); 2937 } 2938 read_unlock(&tbl->lock); 2939 rcu_read_unlock_bh(); 2940 } 2941 EXPORT_SYMBOL(neigh_for_each); 2942 2943 /* The tbl->lock must be held as a writer and BH disabled. */ 2944 void __neigh_for_each_release(struct neigh_table *tbl, 2945 int (*cb)(struct neighbour *)) 2946 { 2947 int chain; 2948 struct neigh_hash_table *nht; 2949 2950 nht = rcu_dereference_protected(tbl->nht, 2951 lockdep_is_held(&tbl->lock)); 2952 for (chain = 0; chain < (1 << nht->hash_shift); chain++) { 2953 struct neighbour *n; 2954 struct neighbour __rcu **np; 2955 2956 np = &nht->hash_buckets[chain]; 2957 while ((n = rcu_dereference_protected(*np, 2958 lockdep_is_held(&tbl->lock))) != NULL) { 2959 int release; 2960 2961 write_lock(&n->lock); 2962 release = cb(n); 2963 if (release) { 2964 rcu_assign_pointer(*np, 2965 rcu_dereference_protected(n->next, 2966 lockdep_is_held(&tbl->lock))); 2967 neigh_mark_dead(n); 2968 } else 2969 np = &n->next; 2970 write_unlock(&n->lock); 2971 if (release) 2972 neigh_cleanup_and_release(n); 2973 } 2974 } 2975 } 2976 EXPORT_SYMBOL(__neigh_for_each_release); 2977 2978 int neigh_xmit(int index, struct net_device *dev, 2979 const void *addr, struct sk_buff *skb) 2980 { 2981 int err = -EAFNOSUPPORT; 2982 if (likely(index < NEIGH_NR_TABLES)) { 2983 struct neigh_table *tbl; 2984 struct neighbour *neigh; 2985 2986 tbl = neigh_tables[index]; 2987 if (!tbl) 2988 goto out; 2989 rcu_read_lock_bh(); 2990 neigh = __neigh_lookup_noref(tbl, addr, dev); 2991 if (!neigh) 2992 neigh = __neigh_create(tbl, addr, dev, false); 2993 err = PTR_ERR(neigh); 2994 if (IS_ERR(neigh)) { 2995 rcu_read_unlock_bh(); 2996 goto out_kfree_skb; 2997 } 2998 err = neigh->output(neigh, skb); 2999 rcu_read_unlock_bh(); 3000 } 3001 else if (index == NEIGH_LINK_TABLE) { 3002 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 3003 addr, NULL, skb->len); 3004 if (err < 0) 3005 goto out_kfree_skb; 3006 err = dev_queue_xmit(skb); 3007 } 3008 out: 3009 return err; 3010 out_kfree_skb: 3011 kfree_skb(skb); 3012 goto out; 3013 } 3014 EXPORT_SYMBOL(neigh_xmit); 3015 3016 #ifdef CONFIG_PROC_FS 3017 3018 static struct neighbour *neigh_get_first(struct seq_file *seq) 3019 { 3020 struct neigh_seq_state *state = seq->private; 3021 struct net *net = seq_file_net(seq); 3022 struct neigh_hash_table *nht = state->nht; 3023 struct neighbour *n = NULL; 3024 int bucket = state->bucket; 3025 3026 state->flags &= ~NEIGH_SEQ_IS_PNEIGH; 3027 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { 3028 n = rcu_dereference_bh(nht->hash_buckets[bucket]); 3029 3030 while (n) { 3031 if (!net_eq(dev_net(n->dev), net)) 3032 goto next; 3033 if (state->neigh_sub_iter) { 3034 loff_t fakep = 0; 3035 void *v; 3036 3037 v = state->neigh_sub_iter(state, n, &fakep); 3038 if (!v) 3039 goto next; 3040 } 3041 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) 3042 break; 3043 if (n->nud_state & ~NUD_NOARP) 3044 break; 3045 next: 3046 n = rcu_dereference_bh(n->next); 3047 } 3048 3049 if (n) 3050 break; 3051 } 3052 state->bucket = bucket; 3053 3054 return n; 3055 } 3056 3057 static struct neighbour *neigh_get_next(struct seq_file *seq, 3058 struct neighbour *n, 3059 loff_t *pos) 3060 { 3061 struct neigh_seq_state *state = seq->private; 3062 struct net *net = seq_file_net(seq); 3063 struct neigh_hash_table *nht = state->nht; 3064 3065 if (state->neigh_sub_iter) { 3066 void *v = state->neigh_sub_iter(state, n, pos); 3067 if (v) 3068 return n; 3069 } 3070 n = rcu_dereference_bh(n->next); 3071 3072 while (1) { 3073 while (n) { 3074 if (!net_eq(dev_net(n->dev), net)) 3075 goto next; 3076 if (state->neigh_sub_iter) { 3077 void *v = state->neigh_sub_iter(state, n, pos); 3078 if (v) 3079 return n; 3080 goto next; 3081 } 3082 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) 3083 break; 3084 3085 if (n->nud_state & ~NUD_NOARP) 3086 break; 3087 next: 3088 n = rcu_dereference_bh(n->next); 3089 } 3090 3091 if (n) 3092 break; 3093 3094 if (++state->bucket >= (1 << nht->hash_shift)) 3095 break; 3096 3097 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]); 3098 } 3099 3100 if (n && pos) 3101 --(*pos); 3102 return n; 3103 } 3104 3105 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) 3106 { 3107 struct neighbour *n = neigh_get_first(seq); 3108 3109 if (n) { 3110 --(*pos); 3111 while (*pos) { 3112 n = neigh_get_next(seq, n, pos); 3113 if (!n) 3114 break; 3115 } 3116 } 3117 return *pos ? NULL : n; 3118 } 3119 3120 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) 3121 { 3122 struct neigh_seq_state *state = seq->private; 3123 struct net *net = seq_file_net(seq); 3124 struct neigh_table *tbl = state->tbl; 3125 struct pneigh_entry *pn = NULL; 3126 int bucket = state->bucket; 3127 3128 state->flags |= NEIGH_SEQ_IS_PNEIGH; 3129 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { 3130 pn = tbl->phash_buckets[bucket]; 3131 while (pn && !net_eq(pneigh_net(pn), net)) 3132 pn = pn->next; 3133 if (pn) 3134 break; 3135 } 3136 state->bucket = bucket; 3137 3138 return pn; 3139 } 3140 3141 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, 3142 struct pneigh_entry *pn, 3143 loff_t *pos) 3144 { 3145 struct neigh_seq_state *state = seq->private; 3146 struct net *net = seq_file_net(seq); 3147 struct neigh_table *tbl = state->tbl; 3148 3149 do { 3150 pn = pn->next; 3151 } while (pn && !net_eq(pneigh_net(pn), net)); 3152 3153 while (!pn) { 3154 if (++state->bucket > PNEIGH_HASHMASK) 3155 break; 3156 pn = tbl->phash_buckets[state->bucket]; 3157 while (pn && !net_eq(pneigh_net(pn), net)) 3158 pn = pn->next; 3159 if (pn) 3160 break; 3161 } 3162 3163 if (pn && pos) 3164 --(*pos); 3165 3166 return pn; 3167 } 3168 3169 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos) 3170 { 3171 struct pneigh_entry *pn = pneigh_get_first(seq); 3172 3173 if (pn) { 3174 --(*pos); 3175 while (*pos) { 3176 pn = pneigh_get_next(seq, pn, pos); 3177 if (!pn) 3178 break; 3179 } 3180 } 3181 return *pos ? NULL : pn; 3182 } 3183 3184 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) 3185 { 3186 struct neigh_seq_state *state = seq->private; 3187 void *rc; 3188 loff_t idxpos = *pos; 3189 3190 rc = neigh_get_idx(seq, &idxpos); 3191 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY)) 3192 rc = pneigh_get_idx(seq, &idxpos); 3193 3194 return rc; 3195 } 3196 3197 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) 3198 __acquires(rcu_bh) 3199 { 3200 struct neigh_seq_state *state = seq->private; 3201 3202 state->tbl = tbl; 3203 state->bucket = 0; 3204 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); 3205 3206 rcu_read_lock_bh(); 3207 state->nht = rcu_dereference_bh(tbl->nht); 3208 3209 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; 3210 } 3211 EXPORT_SYMBOL(neigh_seq_start); 3212 3213 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3214 { 3215 struct neigh_seq_state *state; 3216 void *rc; 3217 3218 if (v == SEQ_START_TOKEN) { 3219 rc = neigh_get_first(seq); 3220 goto out; 3221 } 3222 3223 state = seq->private; 3224 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) { 3225 rc = neigh_get_next(seq, v, NULL); 3226 if (rc) 3227 goto out; 3228 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY)) 3229 rc = pneigh_get_first(seq); 3230 } else { 3231 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY); 3232 rc = pneigh_get_next(seq, v, NULL); 3233 } 3234 out: 3235 ++(*pos); 3236 return rc; 3237 } 3238 EXPORT_SYMBOL(neigh_seq_next); 3239 3240 void neigh_seq_stop(struct seq_file *seq, void *v) 3241 __releases(rcu_bh) 3242 { 3243 rcu_read_unlock_bh(); 3244 } 3245 EXPORT_SYMBOL(neigh_seq_stop); 3246 3247 /* statistics via seq_file */ 3248 3249 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) 3250 { 3251 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3252 int cpu; 3253 3254 if (*pos == 0) 3255 return SEQ_START_TOKEN; 3256 3257 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { 3258 if (!cpu_possible(cpu)) 3259 continue; 3260 *pos = cpu+1; 3261 return per_cpu_ptr(tbl->stats, cpu); 3262 } 3263 return NULL; 3264 } 3265 3266 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3267 { 3268 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3269 int cpu; 3270 3271 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { 3272 if (!cpu_possible(cpu)) 3273 continue; 3274 *pos = cpu+1; 3275 return per_cpu_ptr(tbl->stats, cpu); 3276 } 3277 return NULL; 3278 } 3279 3280 static void neigh_stat_seq_stop(struct seq_file *seq, void *v) 3281 { 3282 3283 } 3284 3285 static int neigh_stat_seq_show(struct seq_file *seq, void *v) 3286 { 3287 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3288 struct neigh_statistics *st = v; 3289 3290 if (v == SEQ_START_TOKEN) { 3291 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n"); 3292 return 0; 3293 } 3294 3295 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " 3296 "%08lx %08lx %08lx %08lx %08lx %08lx\n", 3297 atomic_read(&tbl->entries), 3298 3299 st->allocs, 3300 st->destroys, 3301 st->hash_grows, 3302 3303 st->lookups, 3304 st->hits, 3305 3306 st->res_failed, 3307 3308 st->rcv_probes_mcast, 3309 st->rcv_probes_ucast, 3310 3311 st->periodic_gc_runs, 3312 st->forced_gc_runs, 3313 st->unres_discards, 3314 st->table_fulls 3315 ); 3316 3317 return 0; 3318 } 3319 3320 static const struct seq_operations neigh_stat_seq_ops = { 3321 .start = neigh_stat_seq_start, 3322 .next = neigh_stat_seq_next, 3323 .stop = neigh_stat_seq_stop, 3324 .show = neigh_stat_seq_show, 3325 }; 3326 #endif /* CONFIG_PROC_FS */ 3327 3328 static void __neigh_notify(struct neighbour *n, int type, int flags, 3329 u32 pid) 3330 { 3331 struct net *net = dev_net(n->dev); 3332 struct sk_buff *skb; 3333 int err = -ENOBUFS; 3334 3335 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC); 3336 if (skb == NULL) 3337 goto errout; 3338 3339 err = neigh_fill_info(skb, n, pid, 0, type, flags); 3340 if (err < 0) { 3341 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */ 3342 WARN_ON(err == -EMSGSIZE); 3343 kfree_skb(skb); 3344 goto errout; 3345 } 3346 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); 3347 return; 3348 errout: 3349 if (err < 0) 3350 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); 3351 } 3352 3353 void neigh_app_ns(struct neighbour *n) 3354 { 3355 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0); 3356 } 3357 EXPORT_SYMBOL(neigh_app_ns); 3358 3359 #ifdef CONFIG_SYSCTL 3360 static int zero; 3361 static int int_max = INT_MAX; 3362 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); 3363 3364 static int proc_unres_qlen(struct ctl_table *ctl, int write, 3365 void __user *buffer, size_t *lenp, loff_t *ppos) 3366 { 3367 int size, ret; 3368 struct ctl_table tmp = *ctl; 3369 3370 tmp.extra1 = &zero; 3371 tmp.extra2 = &unres_qlen_max; 3372 tmp.data = &size; 3373 3374 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN); 3375 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 3376 3377 if (write && !ret) 3378 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); 3379 return ret; 3380 } 3381 3382 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, 3383 int family) 3384 { 3385 switch (family) { 3386 case AF_INET: 3387 return __in_dev_arp_parms_get_rcu(dev); 3388 case AF_INET6: 3389 return __in6_dev_nd_parms_get_rcu(dev); 3390 } 3391 return NULL; 3392 } 3393 3394 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, 3395 int index) 3396 { 3397 struct net_device *dev; 3398 int family = neigh_parms_family(p); 3399 3400 rcu_read_lock(); 3401 for_each_netdev_rcu(net, dev) { 3402 struct neigh_parms *dst_p = 3403 neigh_get_dev_parms_rcu(dev, family); 3404 3405 if (dst_p && !test_bit(index, dst_p->data_state)) 3406 dst_p->data[index] = p->data[index]; 3407 } 3408 rcu_read_unlock(); 3409 } 3410 3411 static void neigh_proc_update(struct ctl_table *ctl, int write) 3412 { 3413 struct net_device *dev = ctl->extra1; 3414 struct neigh_parms *p = ctl->extra2; 3415 struct net *net = neigh_parms_net(p); 3416 int index = (int *) ctl->data - p->data; 3417 3418 if (!write) 3419 return; 3420 3421 set_bit(index, p->data_state); 3422 if (index == NEIGH_VAR_DELAY_PROBE_TIME) 3423 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); 3424 if (!dev) /* NULL dev means this is default value */ 3425 neigh_copy_dflt_parms(net, p, index); 3426 } 3427 3428 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, 3429 void __user *buffer, 3430 size_t *lenp, loff_t *ppos) 3431 { 3432 struct ctl_table tmp = *ctl; 3433 int ret; 3434 3435 tmp.extra1 = &zero; 3436 tmp.extra2 = &int_max; 3437 3438 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 3439 neigh_proc_update(ctl, write); 3440 return ret; 3441 } 3442 3443 int neigh_proc_dointvec(struct ctl_table *ctl, int write, 3444 void __user *buffer, size_t *lenp, loff_t *ppos) 3445 { 3446 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 3447 3448 neigh_proc_update(ctl, write); 3449 return ret; 3450 } 3451 EXPORT_SYMBOL(neigh_proc_dointvec); 3452 3453 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, 3454 void __user *buffer, 3455 size_t *lenp, loff_t *ppos) 3456 { 3457 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); 3458 3459 neigh_proc_update(ctl, write); 3460 return ret; 3461 } 3462 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies); 3463 3464 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write, 3465 void __user *buffer, 3466 size_t *lenp, loff_t *ppos) 3467 { 3468 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos); 3469 3470 neigh_proc_update(ctl, write); 3471 return ret; 3472 } 3473 3474 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, 3475 void __user *buffer, 3476 size_t *lenp, loff_t *ppos) 3477 { 3478 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); 3479 3480 neigh_proc_update(ctl, write); 3481 return ret; 3482 } 3483 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies); 3484 3485 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, 3486 void __user *buffer, 3487 size_t *lenp, loff_t *ppos) 3488 { 3489 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos); 3490 3491 neigh_proc_update(ctl, write); 3492 return ret; 3493 } 3494 3495 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write, 3496 void __user *buffer, 3497 size_t *lenp, loff_t *ppos) 3498 { 3499 struct neigh_parms *p = ctl->extra2; 3500 int ret; 3501 3502 if (strcmp(ctl->procname, "base_reachable_time") == 0) 3503 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); 3504 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0) 3505 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); 3506 else 3507 ret = -1; 3508 3509 if (write && ret == 0) { 3510 /* update reachable_time as well, otherwise, the change will 3511 * only be effective after the next time neigh_periodic_work 3512 * decides to recompute it 3513 */ 3514 p->reachable_time = 3515 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 3516 } 3517 return ret; 3518 } 3519 3520 #define NEIGH_PARMS_DATA_OFFSET(index) \ 3521 (&((struct neigh_parms *) 0)->data[index]) 3522 3523 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \ 3524 [NEIGH_VAR_ ## attr] = { \ 3525 .procname = name, \ 3526 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \ 3527 .maxlen = sizeof(int), \ 3528 .mode = mval, \ 3529 .proc_handler = proc, \ 3530 } 3531 3532 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \ 3533 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax) 3534 3535 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \ 3536 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies) 3537 3538 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \ 3539 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies) 3540 3541 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \ 3542 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies) 3543 3544 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \ 3545 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies) 3546 3547 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \ 3548 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen) 3549 3550 static struct neigh_sysctl_table { 3551 struct ctl_table_header *sysctl_header; 3552 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; 3553 } neigh_sysctl_template __read_mostly = { 3554 .neigh_vars = { 3555 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), 3556 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"), 3557 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"), 3558 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"), 3559 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), 3560 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), 3561 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), 3562 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"), 3563 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"), 3564 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"), 3565 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"), 3566 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"), 3567 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"), 3568 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"), 3569 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"), 3570 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"), 3571 [NEIGH_VAR_GC_INTERVAL] = { 3572 .procname = "gc_interval", 3573 .maxlen = sizeof(int), 3574 .mode = 0644, 3575 .proc_handler = proc_dointvec_jiffies, 3576 }, 3577 [NEIGH_VAR_GC_THRESH1] = { 3578 .procname = "gc_thresh1", 3579 .maxlen = sizeof(int), 3580 .mode = 0644, 3581 .extra1 = &zero, 3582 .extra2 = &int_max, 3583 .proc_handler = proc_dointvec_minmax, 3584 }, 3585 [NEIGH_VAR_GC_THRESH2] = { 3586 .procname = "gc_thresh2", 3587 .maxlen = sizeof(int), 3588 .mode = 0644, 3589 .extra1 = &zero, 3590 .extra2 = &int_max, 3591 .proc_handler = proc_dointvec_minmax, 3592 }, 3593 [NEIGH_VAR_GC_THRESH3] = { 3594 .procname = "gc_thresh3", 3595 .maxlen = sizeof(int), 3596 .mode = 0644, 3597 .extra1 = &zero, 3598 .extra2 = &int_max, 3599 .proc_handler = proc_dointvec_minmax, 3600 }, 3601 {}, 3602 }, 3603 }; 3604 3605 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, 3606 proc_handler *handler) 3607 { 3608 int i; 3609 struct neigh_sysctl_table *t; 3610 const char *dev_name_source; 3611 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; 3612 char *p_name; 3613 3614 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); 3615 if (!t) 3616 goto err; 3617 3618 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) { 3619 t->neigh_vars[i].data += (long) p; 3620 t->neigh_vars[i].extra1 = dev; 3621 t->neigh_vars[i].extra2 = p; 3622 } 3623 3624 if (dev) { 3625 dev_name_source = dev->name; 3626 /* Terminate the table early */ 3627 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, 3628 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); 3629 } else { 3630 struct neigh_table *tbl = p->tbl; 3631 dev_name_source = "default"; 3632 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval; 3633 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1; 3634 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2; 3635 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3; 3636 } 3637 3638 if (handler) { 3639 /* RetransTime */ 3640 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; 3641 /* ReachableTime */ 3642 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; 3643 /* RetransTime (in milliseconds)*/ 3644 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; 3645 /* ReachableTime (in milliseconds) */ 3646 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; 3647 } else { 3648 /* Those handlers will update p->reachable_time after 3649 * base_reachable_time(_ms) is set to ensure the new timer starts being 3650 * applied after the next neighbour update instead of waiting for 3651 * neigh_periodic_work to update its value (can be multiple minutes) 3652 * So any handler that replaces them should do this as well 3653 */ 3654 /* ReachableTime */ 3655 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = 3656 neigh_proc_base_reachable_time; 3657 /* ReachableTime (in milliseconds) */ 3658 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = 3659 neigh_proc_base_reachable_time; 3660 } 3661 3662 /* Don't export sysctls to unprivileged users */ 3663 if (neigh_parms_net(p)->user_ns != &init_user_ns) 3664 t->neigh_vars[0].procname = NULL; 3665 3666 switch (neigh_parms_family(p)) { 3667 case AF_INET: 3668 p_name = "ipv4"; 3669 break; 3670 case AF_INET6: 3671 p_name = "ipv6"; 3672 break; 3673 default: 3674 BUG(); 3675 } 3676 3677 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", 3678 p_name, dev_name_source); 3679 t->sysctl_header = 3680 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars); 3681 if (!t->sysctl_header) 3682 goto free; 3683 3684 p->sysctl_table = t; 3685 return 0; 3686 3687 free: 3688 kfree(t); 3689 err: 3690 return -ENOBUFS; 3691 } 3692 EXPORT_SYMBOL(neigh_sysctl_register); 3693 3694 void neigh_sysctl_unregister(struct neigh_parms *p) 3695 { 3696 if (p->sysctl_table) { 3697 struct neigh_sysctl_table *t = p->sysctl_table; 3698 p->sysctl_table = NULL; 3699 unregister_net_sysctl_table(t->sysctl_header); 3700 kfree(t); 3701 } 3702 } 3703 EXPORT_SYMBOL(neigh_sysctl_unregister); 3704 3705 #endif /* CONFIG_SYSCTL */ 3706 3707 static int __init neigh_init(void) 3708 { 3709 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0); 3710 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0); 3711 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0); 3712 3713 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, 3714 0); 3715 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0); 3716 3717 return 0; 3718 } 3719 3720 subsys_initcall(neigh_init); 3721