1 /* 2 * Anycast support for IPv6 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * David L Stevens (dlstevens@us.ibm.com) 7 * 8 * based heavily on net/ipv6/mcast.c 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16 #include <linux/capability.h> 17 #include <linux/module.h> 18 #include <linux/errno.h> 19 #include <linux/types.h> 20 #include <linux/random.h> 21 #include <linux/string.h> 22 #include <linux/socket.h> 23 #include <linux/sockios.h> 24 #include <linux/net.h> 25 #include <linux/in6.h> 26 #include <linux/netdevice.h> 27 #include <linux/if_arp.h> 28 #include <linux/route.h> 29 #include <linux/init.h> 30 #include <linux/proc_fs.h> 31 #include <linux/seq_file.h> 32 #include <linux/slab.h> 33 34 #include <net/net_namespace.h> 35 #include <net/sock.h> 36 #include <net/snmp.h> 37 38 #include <net/ipv6.h> 39 #include <net/protocol.h> 40 #include <net/if_inet6.h> 41 #include <net/ndisc.h> 42 #include <net/addrconf.h> 43 #include <net/ip6_route.h> 44 45 #include <net/checksum.h> 46 47 #define IN6_ADDR_HSIZE_SHIFT 8 48 #define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT) 49 /* anycast address hash table 50 */ 51 static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; 52 static DEFINE_SPINLOCK(acaddr_hash_lock); 53 54 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); 55 56 static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr) 57 { 58 u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net); 59 60 return hash_32(val, IN6_ADDR_HSIZE_SHIFT); 61 } 62 63 /* 64 * socket join an anycast group 65 */ 66 67 int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) 68 { 69 struct ipv6_pinfo *np = inet6_sk(sk); 70 struct net_device *dev = NULL; 71 struct inet6_dev *idev; 72 struct ipv6_ac_socklist *pac; 73 struct net *net = sock_net(sk); 74 int ishost = !net->ipv6.devconf_all->forwarding; 75 int err = 0; 76 77 ASSERT_RTNL(); 78 79 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 80 return -EPERM; 81 if (ipv6_addr_is_multicast(addr)) 82 return -EINVAL; 83 84 if (ifindex) 85 dev = __dev_get_by_index(net, ifindex); 86 87 if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) 88 return -EINVAL; 89 90 pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); 91 if (!pac) 92 return -ENOMEM; 93 pac->acl_next = NULL; 94 pac->acl_addr = *addr; 95 96 if (ifindex == 0) { 97 struct rt6_info *rt; 98 99 rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); 100 if (rt) { 101 dev = rt->dst.dev; 102 ip6_rt_put(rt); 103 } else if (ishost) { 104 err = -EADDRNOTAVAIL; 105 goto error; 106 } else { 107 /* router, no matching interface: just pick one */ 108 dev = __dev_get_by_flags(net, IFF_UP, 109 IFF_UP | IFF_LOOPBACK); 110 } 111 } 112 113 if (!dev) { 114 err = -ENODEV; 115 goto error; 116 } 117 118 idev = __in6_dev_get(dev); 119 if (!idev) { 120 if (ifindex) 121 err = -ENODEV; 122 else 123 err = -EADDRNOTAVAIL; 124 goto error; 125 } 126 /* reset ishost, now that we have a specific device */ 127 ishost = !idev->cnf.forwarding; 128 129 pac->acl_ifindex = dev->ifindex; 130 131 /* XXX 132 * For hosts, allow link-local or matching prefix anycasts. 133 * This obviates the need for propagating anycast routes while 134 * still allowing some non-router anycast participation. 135 */ 136 if (!ipv6_chk_prefix(addr, dev)) { 137 if (ishost) 138 err = -EADDRNOTAVAIL; 139 if (err) 140 goto error; 141 } 142 143 err = __ipv6_dev_ac_inc(idev, addr); 144 if (!err) { 145 pac->acl_next = np->ipv6_ac_list; 146 np->ipv6_ac_list = pac; 147 pac = NULL; 148 } 149 150 error: 151 if (pac) 152 sock_kfree_s(sk, pac, sizeof(*pac)); 153 return err; 154 } 155 156 /* 157 * socket leave an anycast group 158 */ 159 int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) 160 { 161 struct ipv6_pinfo *np = inet6_sk(sk); 162 struct net_device *dev; 163 struct ipv6_ac_socklist *pac, *prev_pac; 164 struct net *net = sock_net(sk); 165 166 ASSERT_RTNL(); 167 168 prev_pac = NULL; 169 for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { 170 if ((ifindex == 0 || pac->acl_ifindex == ifindex) && 171 ipv6_addr_equal(&pac->acl_addr, addr)) 172 break; 173 prev_pac = pac; 174 } 175 if (!pac) 176 return -ENOENT; 177 if (prev_pac) 178 prev_pac->acl_next = pac->acl_next; 179 else 180 np->ipv6_ac_list = pac->acl_next; 181 182 dev = __dev_get_by_index(net, pac->acl_ifindex); 183 if (dev) 184 ipv6_dev_ac_dec(dev, &pac->acl_addr); 185 186 sock_kfree_s(sk, pac, sizeof(*pac)); 187 return 0; 188 } 189 190 void ipv6_sock_ac_close(struct sock *sk) 191 { 192 struct ipv6_pinfo *np = inet6_sk(sk); 193 struct net_device *dev = NULL; 194 struct ipv6_ac_socklist *pac; 195 struct net *net = sock_net(sk); 196 int prev_index; 197 198 if (!np->ipv6_ac_list) 199 return; 200 201 rtnl_lock(); 202 pac = np->ipv6_ac_list; 203 np->ipv6_ac_list = NULL; 204 205 prev_index = 0; 206 while (pac) { 207 struct ipv6_ac_socklist *next = pac->acl_next; 208 209 if (pac->acl_ifindex != prev_index) { 210 dev = __dev_get_by_index(net, pac->acl_ifindex); 211 prev_index = pac->acl_ifindex; 212 } 213 if (dev) 214 ipv6_dev_ac_dec(dev, &pac->acl_addr); 215 sock_kfree_s(sk, pac, sizeof(*pac)); 216 pac = next; 217 } 218 rtnl_unlock(); 219 } 220 221 static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca) 222 { 223 unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr); 224 225 spin_lock(&acaddr_hash_lock); 226 hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]); 227 spin_unlock(&acaddr_hash_lock); 228 } 229 230 static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca) 231 { 232 spin_lock(&acaddr_hash_lock); 233 hlist_del_init_rcu(&aca->aca_addr_lst); 234 spin_unlock(&acaddr_hash_lock); 235 } 236 237 static void aca_get(struct ifacaddr6 *aca) 238 { 239 refcount_inc(&aca->aca_refcnt); 240 } 241 242 static void aca_free_rcu(struct rcu_head *h) 243 { 244 struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu); 245 246 fib6_info_release(aca->aca_rt); 247 kfree(aca); 248 } 249 250 static void aca_put(struct ifacaddr6 *ac) 251 { 252 if (refcount_dec_and_test(&ac->aca_refcnt)) { 253 call_rcu(&ac->rcu, aca_free_rcu); 254 } 255 } 256 257 static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i, 258 const struct in6_addr *addr) 259 { 260 struct ifacaddr6 *aca; 261 262 aca = kzalloc(sizeof(*aca), GFP_ATOMIC); 263 if (!aca) 264 return NULL; 265 266 aca->aca_addr = *addr; 267 fib6_info_hold(f6i); 268 aca->aca_rt = f6i; 269 INIT_HLIST_NODE(&aca->aca_addr_lst); 270 aca->aca_users = 1; 271 /* aca_tstamp should be updated upon changes */ 272 aca->aca_cstamp = aca->aca_tstamp = jiffies; 273 refcount_set(&aca->aca_refcnt, 1); 274 275 return aca; 276 } 277 278 /* 279 * device anycast group inc (add if not found) 280 */ 281 int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) 282 { 283 struct ifacaddr6 *aca; 284 struct fib6_info *f6i; 285 struct net *net; 286 int err; 287 288 ASSERT_RTNL(); 289 290 write_lock_bh(&idev->lock); 291 if (idev->dead) { 292 err = -ENODEV; 293 goto out; 294 } 295 296 for (aca = idev->ac_list; aca; aca = aca->aca_next) { 297 if (ipv6_addr_equal(&aca->aca_addr, addr)) { 298 aca->aca_users++; 299 err = 0; 300 goto out; 301 } 302 } 303 304 net = dev_net(idev->dev); 305 f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC); 306 if (IS_ERR(f6i)) { 307 err = PTR_ERR(f6i); 308 goto out; 309 } 310 aca = aca_alloc(f6i, addr); 311 if (!aca) { 312 fib6_info_release(f6i); 313 err = -ENOMEM; 314 goto out; 315 } 316 317 aca->aca_next = idev->ac_list; 318 idev->ac_list = aca; 319 320 /* Hold this for addrconf_join_solict() below before we unlock, 321 * it is already exposed via idev->ac_list. 322 */ 323 aca_get(aca); 324 write_unlock_bh(&idev->lock); 325 326 ipv6_add_acaddr_hash(net, aca); 327 328 ip6_ins_rt(net, f6i); 329 330 addrconf_join_solict(idev->dev, &aca->aca_addr); 331 332 aca_put(aca); 333 return 0; 334 out: 335 write_unlock_bh(&idev->lock); 336 return err; 337 } 338 339 /* 340 * device anycast group decrement 341 */ 342 int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) 343 { 344 struct ifacaddr6 *aca, *prev_aca; 345 346 ASSERT_RTNL(); 347 348 write_lock_bh(&idev->lock); 349 prev_aca = NULL; 350 for (aca = idev->ac_list; aca; aca = aca->aca_next) { 351 if (ipv6_addr_equal(&aca->aca_addr, addr)) 352 break; 353 prev_aca = aca; 354 } 355 if (!aca) { 356 write_unlock_bh(&idev->lock); 357 return -ENOENT; 358 } 359 if (--aca->aca_users > 0) { 360 write_unlock_bh(&idev->lock); 361 return 0; 362 } 363 if (prev_aca) 364 prev_aca->aca_next = aca->aca_next; 365 else 366 idev->ac_list = aca->aca_next; 367 write_unlock_bh(&idev->lock); 368 ipv6_del_acaddr_hash(aca); 369 addrconf_leave_solict(idev, &aca->aca_addr); 370 371 ip6_del_rt(dev_net(idev->dev), aca->aca_rt); 372 373 aca_put(aca); 374 return 0; 375 } 376 377 /* called with rtnl_lock() */ 378 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr) 379 { 380 struct inet6_dev *idev = __in6_dev_get(dev); 381 382 if (!idev) 383 return -ENODEV; 384 return __ipv6_dev_ac_dec(idev, addr); 385 } 386 387 void ipv6_ac_destroy_dev(struct inet6_dev *idev) 388 { 389 struct ifacaddr6 *aca; 390 391 write_lock_bh(&idev->lock); 392 while ((aca = idev->ac_list) != NULL) { 393 idev->ac_list = aca->aca_next; 394 write_unlock_bh(&idev->lock); 395 396 ipv6_del_acaddr_hash(aca); 397 398 addrconf_leave_solict(idev, &aca->aca_addr); 399 400 ip6_del_rt(dev_net(idev->dev), aca->aca_rt); 401 402 aca_put(aca); 403 404 write_lock_bh(&idev->lock); 405 } 406 write_unlock_bh(&idev->lock); 407 } 408 409 /* 410 * check if the interface has this anycast address 411 * called with rcu_read_lock() 412 */ 413 static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *addr) 414 { 415 struct inet6_dev *idev; 416 struct ifacaddr6 *aca; 417 418 idev = __in6_dev_get(dev); 419 if (idev) { 420 read_lock_bh(&idev->lock); 421 for (aca = idev->ac_list; aca; aca = aca->aca_next) 422 if (ipv6_addr_equal(&aca->aca_addr, addr)) 423 break; 424 read_unlock_bh(&idev->lock); 425 return aca != NULL; 426 } 427 return false; 428 } 429 430 /* 431 * check if given interface (or any, if dev==0) has this anycast address 432 */ 433 bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, 434 const struct in6_addr *addr) 435 { 436 unsigned int hash = inet6_acaddr_hash(net, addr); 437 struct net_device *nh_dev; 438 struct ifacaddr6 *aca; 439 bool found = false; 440 441 rcu_read_lock(); 442 if (dev) 443 found = ipv6_chk_acast_dev(dev, addr); 444 else 445 hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash], 446 aca_addr_lst) { 447 nh_dev = fib6_info_nh_dev(aca->aca_rt); 448 if (!nh_dev || !net_eq(dev_net(nh_dev), net)) 449 continue; 450 if (ipv6_addr_equal(&aca->aca_addr, addr)) { 451 found = true; 452 break; 453 } 454 } 455 rcu_read_unlock(); 456 return found; 457 } 458 459 /* check if this anycast address is link-local on given interface or 460 * is global 461 */ 462 bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, 463 const struct in6_addr *addr) 464 { 465 return ipv6_chk_acast_addr(net, 466 (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL ? 467 dev : NULL), 468 addr); 469 } 470 471 #ifdef CONFIG_PROC_FS 472 struct ac6_iter_state { 473 struct seq_net_private p; 474 struct net_device *dev; 475 struct inet6_dev *idev; 476 }; 477 478 #define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private) 479 480 static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) 481 { 482 struct ifacaddr6 *im = NULL; 483 struct ac6_iter_state *state = ac6_seq_private(seq); 484 struct net *net = seq_file_net(seq); 485 486 state->idev = NULL; 487 for_each_netdev_rcu(net, state->dev) { 488 struct inet6_dev *idev; 489 idev = __in6_dev_get(state->dev); 490 if (!idev) 491 continue; 492 read_lock_bh(&idev->lock); 493 im = idev->ac_list; 494 if (im) { 495 state->idev = idev; 496 break; 497 } 498 read_unlock_bh(&idev->lock); 499 } 500 return im; 501 } 502 503 static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im) 504 { 505 struct ac6_iter_state *state = ac6_seq_private(seq); 506 507 im = im->aca_next; 508 while (!im) { 509 if (likely(state->idev != NULL)) 510 read_unlock_bh(&state->idev->lock); 511 512 state->dev = next_net_device_rcu(state->dev); 513 if (!state->dev) { 514 state->idev = NULL; 515 break; 516 } 517 state->idev = __in6_dev_get(state->dev); 518 if (!state->idev) 519 continue; 520 read_lock_bh(&state->idev->lock); 521 im = state->idev->ac_list; 522 } 523 return im; 524 } 525 526 static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos) 527 { 528 struct ifacaddr6 *im = ac6_get_first(seq); 529 if (im) 530 while (pos && (im = ac6_get_next(seq, im)) != NULL) 531 --pos; 532 return pos ? NULL : im; 533 } 534 535 static void *ac6_seq_start(struct seq_file *seq, loff_t *pos) 536 __acquires(RCU) 537 { 538 rcu_read_lock(); 539 return ac6_get_idx(seq, *pos); 540 } 541 542 static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos) 543 { 544 struct ifacaddr6 *im = ac6_get_next(seq, v); 545 546 ++*pos; 547 return im; 548 } 549 550 static void ac6_seq_stop(struct seq_file *seq, void *v) 551 __releases(RCU) 552 { 553 struct ac6_iter_state *state = ac6_seq_private(seq); 554 555 if (likely(state->idev != NULL)) { 556 read_unlock_bh(&state->idev->lock); 557 state->idev = NULL; 558 } 559 rcu_read_unlock(); 560 } 561 562 static int ac6_seq_show(struct seq_file *seq, void *v) 563 { 564 struct ifacaddr6 *im = (struct ifacaddr6 *)v; 565 struct ac6_iter_state *state = ac6_seq_private(seq); 566 567 seq_printf(seq, "%-4d %-15s %pi6 %5d\n", 568 state->dev->ifindex, state->dev->name, 569 &im->aca_addr, im->aca_users); 570 return 0; 571 } 572 573 static const struct seq_operations ac6_seq_ops = { 574 .start = ac6_seq_start, 575 .next = ac6_seq_next, 576 .stop = ac6_seq_stop, 577 .show = ac6_seq_show, 578 }; 579 580 int __net_init ac6_proc_init(struct net *net) 581 { 582 if (!proc_create_net("anycast6", 0444, net->proc_net, &ac6_seq_ops, 583 sizeof(struct ac6_iter_state))) 584 return -ENOMEM; 585 586 return 0; 587 } 588 589 void ac6_proc_exit(struct net *net) 590 { 591 remove_proc_entry("anycast6", net->proc_net); 592 } 593 #endif 594 595 /* Init / cleanup code 596 */ 597 int __init ipv6_anycast_init(void) 598 { 599 int i; 600 601 for (i = 0; i < IN6_ADDR_HSIZE; i++) 602 INIT_HLIST_HEAD(&inet6_acaddr_lst[i]); 603 return 0; 604 } 605 606 void ipv6_anycast_cleanup(void) 607 { 608 int i; 609 610 spin_lock(&acaddr_hash_lock); 611 for (i = 0; i < IN6_ADDR_HSIZE; i++) 612 WARN_ON(!hlist_empty(&inet6_acaddr_lst[i])); 613 spin_unlock(&acaddr_hash_lock); 614 } 615