1 /* 2 * Anycast support for IPv6 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * David L Stevens (dlstevens@us.ibm.com) 7 * 8 * based heavily on net/ipv6/mcast.c 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16 #include <linux/capability.h> 17 #include <linux/module.h> 18 #include <linux/errno.h> 19 #include <linux/types.h> 20 #include <linux/random.h> 21 #include <linux/string.h> 22 #include <linux/socket.h> 23 #include <linux/sockios.h> 24 #include <linux/net.h> 25 #include <linux/in6.h> 26 #include <linux/netdevice.h> 27 #include <linux/if_arp.h> 28 #include <linux/route.h> 29 #include <linux/init.h> 30 #include <linux/proc_fs.h> 31 #include <linux/seq_file.h> 32 33 #include <net/sock.h> 34 #include <net/snmp.h> 35 36 #include <net/ipv6.h> 37 #include <net/protocol.h> 38 #include <net/if_inet6.h> 39 #include <net/ndisc.h> 40 #include <net/addrconf.h> 41 #include <net/ip6_route.h> 42 43 #include <net/checksum.h> 44 45 static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr); 46 47 /* Big ac list lock for all the sockets */ 48 static DEFINE_RWLOCK(ipv6_sk_ac_lock); 49 50 static int 51 ip6_onlink(struct in6_addr *addr, struct net_device *dev) 52 { 53 struct inet6_dev *idev; 54 struct inet6_ifaddr *ifa; 55 int onlink; 56 57 onlink = 0; 58 rcu_read_lock(); 59 idev = __in6_dev_get(dev); 60 if (idev) { 61 read_lock_bh(&idev->lock); 62 for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) { 63 onlink = ipv6_prefix_equal(addr, &ifa->addr, 64 ifa->prefix_len); 65 if (onlink) 66 break; 67 } 68 read_unlock_bh(&idev->lock); 69 } 70 rcu_read_unlock(); 71 return onlink; 72 } 73 74 /* 75 * socket join an anycast group 76 */ 77 78 int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) 79 { 80 struct ipv6_pinfo *np = inet6_sk(sk); 81 struct net_device *dev = NULL; 82 struct inet6_dev *idev; 83 struct ipv6_ac_socklist *pac; 84 int ishost = !ipv6_devconf.forwarding; 85 int err = 0; 86 87 if (!capable(CAP_NET_ADMIN)) 88 return -EPERM; 89 if (ipv6_addr_is_multicast(addr)) 90 return -EINVAL; 91 if (ipv6_chk_addr(addr, NULL, 0)) 92 return -EINVAL; 93 94 pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); 95 if (pac == NULL) 96 return -ENOMEM; 97 pac->acl_next = NULL; 98 ipv6_addr_copy(&pac->acl_addr, addr); 99 100 if (ifindex == 0) { 101 struct rt6_info *rt; 102 103 rt = rt6_lookup(addr, NULL, 0, 0); 104 if (rt) { 105 dev = rt->rt6i_dev; 106 dev_hold(dev); 107 dst_release(&rt->u.dst); 108 } else if (ishost) { 109 err = -EADDRNOTAVAIL; 110 goto out_free_pac; 111 } else { 112 /* router, no matching interface: just pick one */ 113 114 dev = dev_get_by_flags(IFF_UP, IFF_UP|IFF_LOOPBACK); 115 } 116 } else 117 dev = dev_get_by_index(ifindex); 118 119 if (dev == NULL) { 120 err = -ENODEV; 121 goto out_free_pac; 122 } 123 124 idev = in6_dev_get(dev); 125 if (!idev) { 126 if (ifindex) 127 err = -ENODEV; 128 else 129 err = -EADDRNOTAVAIL; 130 goto out_dev_put; 131 } 132 /* reset ishost, now that we have a specific device */ 133 ishost = !idev->cnf.forwarding; 134 in6_dev_put(idev); 135 136 pac->acl_ifindex = dev->ifindex; 137 138 /* XXX 139 * For hosts, allow link-local or matching prefix anycasts. 140 * This obviates the need for propagating anycast routes while 141 * still allowing some non-router anycast participation. 142 */ 143 if (!ip6_onlink(addr, dev)) { 144 if (ishost) 145 err = -EADDRNOTAVAIL; 146 if (err) 147 goto out_dev_put; 148 } 149 150 err = ipv6_dev_ac_inc(dev, addr); 151 if (err) 152 goto out_dev_put; 153 154 write_lock_bh(&ipv6_sk_ac_lock); 155 pac->acl_next = np->ipv6_ac_list; 156 np->ipv6_ac_list = pac; 157 write_unlock_bh(&ipv6_sk_ac_lock); 158 159 dev_put(dev); 160 161 return 0; 162 163 out_dev_put: 164 dev_put(dev); 165 out_free_pac: 166 sock_kfree_s(sk, pac, sizeof(*pac)); 167 return err; 168 } 169 170 /* 171 * socket leave an anycast group 172 */ 173 int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr) 174 { 175 struct ipv6_pinfo *np = inet6_sk(sk); 176 struct net_device *dev; 177 struct ipv6_ac_socklist *pac, *prev_pac; 178 179 write_lock_bh(&ipv6_sk_ac_lock); 180 prev_pac = NULL; 181 for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { 182 if ((ifindex == 0 || pac->acl_ifindex == ifindex) && 183 ipv6_addr_equal(&pac->acl_addr, addr)) 184 break; 185 prev_pac = pac; 186 } 187 if (!pac) { 188 write_unlock_bh(&ipv6_sk_ac_lock); 189 return -ENOENT; 190 } 191 if (prev_pac) 192 prev_pac->acl_next = pac->acl_next; 193 else 194 np->ipv6_ac_list = pac->acl_next; 195 196 write_unlock_bh(&ipv6_sk_ac_lock); 197 198 dev = dev_get_by_index(pac->acl_ifindex); 199 if (dev) { 200 ipv6_dev_ac_dec(dev, &pac->acl_addr); 201 dev_put(dev); 202 } 203 sock_kfree_s(sk, pac, sizeof(*pac)); 204 return 0; 205 } 206 207 void ipv6_sock_ac_close(struct sock *sk) 208 { 209 struct ipv6_pinfo *np = inet6_sk(sk); 210 struct net_device *dev = NULL; 211 struct ipv6_ac_socklist *pac; 212 int prev_index; 213 214 write_lock_bh(&ipv6_sk_ac_lock); 215 pac = np->ipv6_ac_list; 216 np->ipv6_ac_list = NULL; 217 write_unlock_bh(&ipv6_sk_ac_lock); 218 219 prev_index = 0; 220 while (pac) { 221 struct ipv6_ac_socklist *next = pac->acl_next; 222 223 if (pac->acl_ifindex != prev_index) { 224 if (dev) 225 dev_put(dev); 226 dev = dev_get_by_index(pac->acl_ifindex); 227 prev_index = pac->acl_ifindex; 228 } 229 if (dev) 230 ipv6_dev_ac_dec(dev, &pac->acl_addr); 231 sock_kfree_s(sk, pac, sizeof(*pac)); 232 pac = next; 233 } 234 if (dev) 235 dev_put(dev); 236 } 237 238 #if 0 239 /* The function is not used, which is funny. Apparently, author 240 * supposed to use it to filter out datagrams inside udp/raw but forgot. 241 * 242 * It is OK, anycasts are not special comparing to delivery to unicasts. 243 */ 244 245 int inet6_ac_check(struct sock *sk, struct in6_addr *addr, int ifindex) 246 { 247 struct ipv6_ac_socklist *pac; 248 struct ipv6_pinfo *np = inet6_sk(sk); 249 int found; 250 251 found = 0; 252 read_lock(&ipv6_sk_ac_lock); 253 for (pac=np->ipv6_ac_list; pac; pac=pac->acl_next) { 254 if (ifindex && pac->acl_ifindex != ifindex) 255 continue; 256 found = ipv6_addr_equal(&pac->acl_addr, addr); 257 if (found) 258 break; 259 } 260 read_unlock(&ipv6_sk_ac_lock); 261 262 return found; 263 } 264 265 #endif 266 267 static void aca_put(struct ifacaddr6 *ac) 268 { 269 if (atomic_dec_and_test(&ac->aca_refcnt)) { 270 in6_dev_put(ac->aca_idev); 271 dst_release(&ac->aca_rt->u.dst); 272 kfree(ac); 273 } 274 } 275 276 /* 277 * device anycast group inc (add if not found) 278 */ 279 int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr) 280 { 281 struct ifacaddr6 *aca; 282 struct inet6_dev *idev; 283 struct rt6_info *rt; 284 int err; 285 286 idev = in6_dev_get(dev); 287 288 if (idev == NULL) 289 return -EINVAL; 290 291 write_lock_bh(&idev->lock); 292 if (idev->dead) { 293 err = -ENODEV; 294 goto out; 295 } 296 297 for (aca = idev->ac_list; aca; aca = aca->aca_next) { 298 if (ipv6_addr_equal(&aca->aca_addr, addr)) { 299 aca->aca_users++; 300 err = 0; 301 goto out; 302 } 303 } 304 305 /* 306 * not found: create a new one. 307 */ 308 309 aca = kzalloc(sizeof(struct ifacaddr6), GFP_ATOMIC); 310 311 if (aca == NULL) { 312 err = -ENOMEM; 313 goto out; 314 } 315 316 rt = addrconf_dst_alloc(idev, addr, 1); 317 if (IS_ERR(rt)) { 318 kfree(aca); 319 err = PTR_ERR(rt); 320 goto out; 321 } 322 323 ipv6_addr_copy(&aca->aca_addr, addr); 324 aca->aca_idev = idev; 325 aca->aca_rt = rt; 326 aca->aca_users = 1; 327 /* aca_tstamp should be updated upon changes */ 328 aca->aca_cstamp = aca->aca_tstamp = jiffies; 329 atomic_set(&aca->aca_refcnt, 2); 330 spin_lock_init(&aca->aca_lock); 331 332 aca->aca_next = idev->ac_list; 333 idev->ac_list = aca; 334 write_unlock_bh(&idev->lock); 335 336 dst_hold(&rt->u.dst); 337 if (ip6_ins_rt(rt)) 338 dst_release(&rt->u.dst); 339 340 addrconf_join_solict(dev, &aca->aca_addr); 341 342 aca_put(aca); 343 return 0; 344 out: 345 write_unlock_bh(&idev->lock); 346 in6_dev_put(idev); 347 return err; 348 } 349 350 /* 351 * device anycast group decrement 352 */ 353 int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr) 354 { 355 struct ifacaddr6 *aca, *prev_aca; 356 357 write_lock_bh(&idev->lock); 358 prev_aca = NULL; 359 for (aca = idev->ac_list; aca; aca = aca->aca_next) { 360 if (ipv6_addr_equal(&aca->aca_addr, addr)) 361 break; 362 prev_aca = aca; 363 } 364 if (!aca) { 365 write_unlock_bh(&idev->lock); 366 return -ENOENT; 367 } 368 if (--aca->aca_users > 0) { 369 write_unlock_bh(&idev->lock); 370 return 0; 371 } 372 if (prev_aca) 373 prev_aca->aca_next = aca->aca_next; 374 else 375 idev->ac_list = aca->aca_next; 376 write_unlock_bh(&idev->lock); 377 addrconf_leave_solict(idev, &aca->aca_addr); 378 379 dst_hold(&aca->aca_rt->u.dst); 380 if (ip6_del_rt(aca->aca_rt)) 381 dst_free(&aca->aca_rt->u.dst); 382 else 383 dst_release(&aca->aca_rt->u.dst); 384 385 aca_put(aca); 386 return 0; 387 } 388 389 static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr) 390 { 391 int ret; 392 struct inet6_dev *idev = in6_dev_get(dev); 393 if (idev == NULL) 394 return -ENODEV; 395 ret = __ipv6_dev_ac_dec(idev, addr); 396 in6_dev_put(idev); 397 return ret; 398 } 399 400 /* 401 * check if the interface has this anycast address 402 */ 403 static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr) 404 { 405 struct inet6_dev *idev; 406 struct ifacaddr6 *aca; 407 408 idev = in6_dev_get(dev); 409 if (idev) { 410 read_lock_bh(&idev->lock); 411 for (aca = idev->ac_list; aca; aca = aca->aca_next) 412 if (ipv6_addr_equal(&aca->aca_addr, addr)) 413 break; 414 read_unlock_bh(&idev->lock); 415 in6_dev_put(idev); 416 return aca != 0; 417 } 418 return 0; 419 } 420 421 /* 422 * check if given interface (or any, if dev==0) has this anycast address 423 */ 424 int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr) 425 { 426 int found = 0; 427 428 if (dev) 429 return ipv6_chk_acast_dev(dev, addr); 430 read_lock(&dev_base_lock); 431 for_each_netdev(dev) 432 if (ipv6_chk_acast_dev(dev, addr)) { 433 found = 1; 434 break; 435 } 436 read_unlock(&dev_base_lock); 437 return found; 438 } 439 440 441 #ifdef CONFIG_PROC_FS 442 struct ac6_iter_state { 443 struct net_device *dev; 444 struct inet6_dev *idev; 445 }; 446 447 #define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private) 448 449 static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) 450 { 451 struct ifacaddr6 *im = NULL; 452 struct ac6_iter_state *state = ac6_seq_private(seq); 453 454 state->idev = NULL; 455 for_each_netdev(state->dev) { 456 struct inet6_dev *idev; 457 idev = in6_dev_get(state->dev); 458 if (!idev) 459 continue; 460 read_lock_bh(&idev->lock); 461 im = idev->ac_list; 462 if (im) { 463 state->idev = idev; 464 break; 465 } 466 read_unlock_bh(&idev->lock); 467 in6_dev_put(idev); 468 } 469 return im; 470 } 471 472 static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im) 473 { 474 struct ac6_iter_state *state = ac6_seq_private(seq); 475 476 im = im->aca_next; 477 while (!im) { 478 if (likely(state->idev != NULL)) { 479 read_unlock_bh(&state->idev->lock); 480 in6_dev_put(state->idev); 481 } 482 state->dev = next_net_device(state->dev); 483 if (!state->dev) { 484 state->idev = NULL; 485 break; 486 } 487 state->idev = in6_dev_get(state->dev); 488 if (!state->idev) 489 continue; 490 read_lock_bh(&state->idev->lock); 491 im = state->idev->ac_list; 492 } 493 return im; 494 } 495 496 static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos) 497 { 498 struct ifacaddr6 *im = ac6_get_first(seq); 499 if (im) 500 while (pos && (im = ac6_get_next(seq, im)) != NULL) 501 --pos; 502 return pos ? NULL : im; 503 } 504 505 static void *ac6_seq_start(struct seq_file *seq, loff_t *pos) 506 { 507 read_lock(&dev_base_lock); 508 return ac6_get_idx(seq, *pos); 509 } 510 511 static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos) 512 { 513 struct ifacaddr6 *im; 514 im = ac6_get_next(seq, v); 515 ++*pos; 516 return im; 517 } 518 519 static void ac6_seq_stop(struct seq_file *seq, void *v) 520 { 521 struct ac6_iter_state *state = ac6_seq_private(seq); 522 if (likely(state->idev != NULL)) { 523 read_unlock_bh(&state->idev->lock); 524 in6_dev_put(state->idev); 525 } 526 read_unlock(&dev_base_lock); 527 } 528 529 static int ac6_seq_show(struct seq_file *seq, void *v) 530 { 531 struct ifacaddr6 *im = (struct ifacaddr6 *)v; 532 struct ac6_iter_state *state = ac6_seq_private(seq); 533 534 seq_printf(seq, 535 "%-4d %-15s " NIP6_SEQFMT " %5d\n", 536 state->dev->ifindex, state->dev->name, 537 NIP6(im->aca_addr), 538 im->aca_users); 539 return 0; 540 } 541 542 static struct seq_operations ac6_seq_ops = { 543 .start = ac6_seq_start, 544 .next = ac6_seq_next, 545 .stop = ac6_seq_stop, 546 .show = ac6_seq_show, 547 }; 548 549 static int ac6_seq_open(struct inode *inode, struct file *file) 550 { 551 struct seq_file *seq; 552 int rc = -ENOMEM; 553 struct ac6_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); 554 555 if (!s) 556 goto out; 557 558 rc = seq_open(file, &ac6_seq_ops); 559 if (rc) 560 goto out_kfree; 561 562 seq = file->private_data; 563 seq->private = s; 564 out: 565 return rc; 566 out_kfree: 567 kfree(s); 568 goto out; 569 } 570 571 static const struct file_operations ac6_seq_fops = { 572 .owner = THIS_MODULE, 573 .open = ac6_seq_open, 574 .read = seq_read, 575 .llseek = seq_lseek, 576 .release = seq_release_private, 577 }; 578 579 int __init ac6_proc_init(void) 580 { 581 if (!proc_net_fops_create("anycast6", S_IRUGO, &ac6_seq_fops)) 582 return -ENOMEM; 583 584 return 0; 585 } 586 587 void ac6_proc_exit(void) 588 { 589 proc_net_remove("anycast6"); 590 } 591 #endif 592 593