1 /* 2 * IPv6 Address Label subsystem 3 * for the IPv6 "Default" Source Address Selection 4 * 5 * Copyright (C)2007 USAGI/WIDE Project 6 */ 7 /* 8 * Author: 9 * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/list.h> 14 #include <linux/rcupdate.h> 15 #include <linux/in6.h> 16 #include <linux/slab.h> 17 #include <net/addrconf.h> 18 #include <linux/if_addrlabel.h> 19 #include <linux/netlink.h> 20 #include <linux/rtnetlink.h> 21 22 #if 0 23 #define ADDRLABEL(x...) printk(x) 24 #else 25 #define ADDRLABEL(x...) do { ; } while(0) 26 #endif 27 28 /* 29 * Policy Table 30 */ 31 struct ip6addrlbl_entry 32 { 33 #ifdef CONFIG_NET_NS 34 struct net *lbl_net; 35 #endif 36 struct in6_addr prefix; 37 int prefixlen; 38 int ifindex; 39 int addrtype; 40 u32 label; 41 struct hlist_node list; 42 atomic_t refcnt; 43 struct rcu_head rcu; 44 }; 45 46 static struct ip6addrlbl_table 47 { 48 struct hlist_head head; 49 spinlock_t lock; 50 u32 seq; 51 } ip6addrlbl_table; 52 53 static inline 54 struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) 55 { 56 return read_pnet(&lbl->lbl_net); 57 } 58 59 /* 60 * Default policy table (RFC6724 + extensions) 61 * 62 * prefix addr_type label 63 * ------------------------------------------------------------------------- 64 * ::1/128 LOOPBACK 0 65 * ::/0 N/A 1 66 * 2002::/16 N/A 2 67 * ::/96 COMPATv4 3 68 * ::ffff:0:0/96 V4MAPPED 4 69 * fc00::/7 N/A 5 ULA (RFC 4193) 70 * 2001::/32 N/A 6 Teredo (RFC 4380) 71 * 2001:10::/28 N/A 7 ORCHID (RFC 4843) 72 * fec0::/10 N/A 11 Site-local 73 * (deprecated by RFC3879) 74 * 3ffe::/16 N/A 12 6bone 75 * 76 * Note: 0xffffffff is used if we do not have any policies. 77 * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724. 78 */ 79 80 #define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL 81 82 static const __net_initconst struct ip6addrlbl_init_table 83 { 84 const struct in6_addr *prefix; 85 int prefixlen; 86 u32 label; 87 } ip6addrlbl_init_table[] = { 88 { /* ::/0 */ 89 .prefix = &in6addr_any, 90 .label = 1, 91 },{ /* fc00::/7 */ 92 .prefix = &(struct in6_addr){{{ 0xfc }}}, 93 .prefixlen = 7, 94 .label = 5, 95 },{ /* fec0::/10 */ 96 .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}}, 97 .prefixlen = 10, 98 .label = 11, 99 },{ /* 2002::/16 */ 100 .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}}, 101 .prefixlen = 16, 102 .label = 2, 103 },{ /* 3ffe::/16 */ 104 .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}}, 105 .prefixlen = 16, 106 .label = 12, 107 },{ /* 2001::/32 */ 108 .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, 109 .prefixlen = 32, 110 .label = 6, 111 },{ /* 2001:10::/28 */ 112 .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}}, 113 .prefixlen = 28, 114 .label = 7, 115 },{ /* ::ffff:0:0 */ 116 .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}}, 117 .prefixlen = 96, 118 .label = 4, 119 },{ /* ::/96 */ 120 .prefix = &in6addr_any, 121 .prefixlen = 96, 122 .label = 3, 123 },{ /* ::1/128 */ 124 .prefix = &in6addr_loopback, 125 .prefixlen = 128, 126 .label = 0, 127 } 128 }; 129 130 /* Object management */ 131 static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) 132 { 133 #ifdef CONFIG_NET_NS 134 release_net(p->lbl_net); 135 #endif 136 kfree(p); 137 } 138 139 static void ip6addrlbl_free_rcu(struct rcu_head *h) 140 { 141 ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); 142 } 143 144 static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p) 145 { 146 return atomic_inc_not_zero(&p->refcnt); 147 } 148 149 static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) 150 { 151 if (atomic_dec_and_test(&p->refcnt)) 152 call_rcu(&p->rcu, ip6addrlbl_free_rcu); 153 } 154 155 /* Find label */ 156 static bool __ip6addrlbl_match(struct net *net, 157 const struct ip6addrlbl_entry *p, 158 const struct in6_addr *addr, 159 int addrtype, int ifindex) 160 { 161 if (!net_eq(ip6addrlbl_net(p), net)) 162 return false; 163 if (p->ifindex && p->ifindex != ifindex) 164 return false; 165 if (p->addrtype && p->addrtype != addrtype) 166 return false; 167 if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen)) 168 return false; 169 return true; 170 } 171 172 static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, 173 const struct in6_addr *addr, 174 int type, int ifindex) 175 { 176 struct hlist_node *pos; 177 struct ip6addrlbl_entry *p; 178 hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { 179 if (__ip6addrlbl_match(net, p, addr, type, ifindex)) 180 return p; 181 } 182 return NULL; 183 } 184 185 u32 ipv6_addr_label(struct net *net, 186 const struct in6_addr *addr, int type, int ifindex) 187 { 188 u32 label; 189 struct ip6addrlbl_entry *p; 190 191 type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK; 192 193 rcu_read_lock(); 194 p = __ipv6_addr_label(net, addr, type, ifindex); 195 label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; 196 rcu_read_unlock(); 197 198 ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", 199 __func__, addr, type, ifindex, label); 200 201 return label; 202 } 203 204 /* allocate one entry */ 205 static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, 206 const struct in6_addr *prefix, 207 int prefixlen, int ifindex, 208 u32 label) 209 { 210 struct ip6addrlbl_entry *newp; 211 int addrtype; 212 213 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", 214 __func__, prefix, prefixlen, ifindex, (unsigned int)label); 215 216 addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); 217 218 switch (addrtype) { 219 case IPV6_ADDR_MAPPED: 220 if (prefixlen > 96) 221 return ERR_PTR(-EINVAL); 222 if (prefixlen < 96) 223 addrtype = 0; 224 break; 225 case IPV6_ADDR_COMPATv4: 226 if (prefixlen != 96) 227 addrtype = 0; 228 break; 229 case IPV6_ADDR_LOOPBACK: 230 if (prefixlen != 128) 231 addrtype = 0; 232 break; 233 } 234 235 newp = kmalloc(sizeof(*newp), GFP_KERNEL); 236 if (!newp) 237 return ERR_PTR(-ENOMEM); 238 239 ipv6_addr_prefix(&newp->prefix, prefix, prefixlen); 240 newp->prefixlen = prefixlen; 241 newp->ifindex = ifindex; 242 newp->addrtype = addrtype; 243 newp->label = label; 244 INIT_HLIST_NODE(&newp->list); 245 #ifdef CONFIG_NET_NS 246 newp->lbl_net = hold_net(net); 247 #endif 248 atomic_set(&newp->refcnt, 1); 249 return newp; 250 } 251 252 /* add a label */ 253 static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) 254 { 255 int ret = 0; 256 257 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", 258 __func__, 259 newp, replace); 260 261 if (hlist_empty(&ip6addrlbl_table.head)) { 262 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); 263 } else { 264 struct hlist_node *pos, *n; 265 struct ip6addrlbl_entry *p = NULL; 266 hlist_for_each_entry_safe(p, pos, n, 267 &ip6addrlbl_table.head, list) { 268 if (p->prefixlen == newp->prefixlen && 269 net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && 270 p->ifindex == newp->ifindex && 271 ipv6_addr_equal(&p->prefix, &newp->prefix)) { 272 if (!replace) { 273 ret = -EEXIST; 274 goto out; 275 } 276 hlist_replace_rcu(&p->list, &newp->list); 277 ip6addrlbl_put(p); 278 goto out; 279 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || 280 (p->prefixlen < newp->prefixlen)) { 281 hlist_add_before_rcu(&newp->list, &p->list); 282 goto out; 283 } 284 } 285 hlist_add_after_rcu(&p->list, &newp->list); 286 } 287 out: 288 if (!ret) 289 ip6addrlbl_table.seq++; 290 return ret; 291 } 292 293 /* add a label */ 294 static int ip6addrlbl_add(struct net *net, 295 const struct in6_addr *prefix, int prefixlen, 296 int ifindex, u32 label, int replace) 297 { 298 struct ip6addrlbl_entry *newp; 299 int ret = 0; 300 301 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", 302 __func__, prefix, prefixlen, ifindex, (unsigned int)label, 303 replace); 304 305 newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); 306 if (IS_ERR(newp)) 307 return PTR_ERR(newp); 308 spin_lock(&ip6addrlbl_table.lock); 309 ret = __ip6addrlbl_add(newp, replace); 310 spin_unlock(&ip6addrlbl_table.lock); 311 if (ret) 312 ip6addrlbl_free(newp); 313 return ret; 314 } 315 316 /* remove a label */ 317 static int __ip6addrlbl_del(struct net *net, 318 const struct in6_addr *prefix, int prefixlen, 319 int ifindex) 320 { 321 struct ip6addrlbl_entry *p = NULL; 322 struct hlist_node *pos, *n; 323 int ret = -ESRCH; 324 325 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 326 __func__, prefix, prefixlen, ifindex); 327 328 hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { 329 if (p->prefixlen == prefixlen && 330 net_eq(ip6addrlbl_net(p), net) && 331 p->ifindex == ifindex && 332 ipv6_addr_equal(&p->prefix, prefix)) { 333 hlist_del_rcu(&p->list); 334 ip6addrlbl_put(p); 335 ret = 0; 336 break; 337 } 338 } 339 return ret; 340 } 341 342 static int ip6addrlbl_del(struct net *net, 343 const struct in6_addr *prefix, int prefixlen, 344 int ifindex) 345 { 346 struct in6_addr prefix_buf; 347 int ret; 348 349 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 350 __func__, prefix, prefixlen, ifindex); 351 352 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); 353 spin_lock(&ip6addrlbl_table.lock); 354 ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); 355 spin_unlock(&ip6addrlbl_table.lock); 356 return ret; 357 } 358 359 /* add default label */ 360 static int __net_init ip6addrlbl_net_init(struct net *net) 361 { 362 int err = 0; 363 int i; 364 365 ADDRLABEL(KERN_DEBUG "%s\n", __func__); 366 367 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { 368 int ret = ip6addrlbl_add(net, 369 ip6addrlbl_init_table[i].prefix, 370 ip6addrlbl_init_table[i].prefixlen, 371 0, 372 ip6addrlbl_init_table[i].label, 0); 373 /* XXX: should we free all rules when we catch an error? */ 374 if (ret && (!err || err != -ENOMEM)) 375 err = ret; 376 } 377 return err; 378 } 379 380 static void __net_exit ip6addrlbl_net_exit(struct net *net) 381 { 382 struct ip6addrlbl_entry *p = NULL; 383 struct hlist_node *pos, *n; 384 385 /* Remove all labels belonging to the exiting net */ 386 spin_lock(&ip6addrlbl_table.lock); 387 hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { 388 if (net_eq(ip6addrlbl_net(p), net)) { 389 hlist_del_rcu(&p->list); 390 ip6addrlbl_put(p); 391 } 392 } 393 spin_unlock(&ip6addrlbl_table.lock); 394 } 395 396 static struct pernet_operations ipv6_addr_label_ops = { 397 .init = ip6addrlbl_net_init, 398 .exit = ip6addrlbl_net_exit, 399 }; 400 401 int __init ipv6_addr_label_init(void) 402 { 403 spin_lock_init(&ip6addrlbl_table.lock); 404 405 return register_pernet_subsys(&ipv6_addr_label_ops); 406 } 407 408 void ipv6_addr_label_cleanup(void) 409 { 410 unregister_pernet_subsys(&ipv6_addr_label_ops); 411 } 412 413 static const struct nla_policy ifal_policy[IFAL_MAX+1] = { 414 [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, 415 [IFAL_LABEL] = { .len = sizeof(u32), }, 416 }; 417 418 static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, 419 void *arg) 420 { 421 struct net *net = sock_net(skb->sk); 422 struct ifaddrlblmsg *ifal; 423 struct nlattr *tb[IFAL_MAX+1]; 424 struct in6_addr *pfx; 425 u32 label; 426 int err = 0; 427 428 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); 429 if (err < 0) 430 return err; 431 432 ifal = nlmsg_data(nlh); 433 434 if (ifal->ifal_family != AF_INET6 || 435 ifal->ifal_prefixlen > 128) 436 return -EINVAL; 437 438 if (!tb[IFAL_ADDRESS]) 439 return -EINVAL; 440 441 pfx = nla_data(tb[IFAL_ADDRESS]); 442 if (!pfx) 443 return -EINVAL; 444 445 if (!tb[IFAL_LABEL]) 446 return -EINVAL; 447 label = nla_get_u32(tb[IFAL_LABEL]); 448 if (label == IPV6_ADDR_LABEL_DEFAULT) 449 return -EINVAL; 450 451 switch(nlh->nlmsg_type) { 452 case RTM_NEWADDRLABEL: 453 if (ifal->ifal_index && 454 !__dev_get_by_index(net, ifal->ifal_index)) 455 return -EINVAL; 456 457 err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen, 458 ifal->ifal_index, label, 459 nlh->nlmsg_flags & NLM_F_REPLACE); 460 break; 461 case RTM_DELADDRLABEL: 462 err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen, 463 ifal->ifal_index); 464 break; 465 default: 466 err = -EOPNOTSUPP; 467 } 468 return err; 469 } 470 471 static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, 472 int prefixlen, int ifindex, u32 lseq) 473 { 474 struct ifaddrlblmsg *ifal = nlmsg_data(nlh); 475 ifal->ifal_family = AF_INET6; 476 ifal->ifal_prefixlen = prefixlen; 477 ifal->ifal_flags = 0; 478 ifal->ifal_index = ifindex; 479 ifal->ifal_seq = lseq; 480 }; 481 482 static int ip6addrlbl_fill(struct sk_buff *skb, 483 struct ip6addrlbl_entry *p, 484 u32 lseq, 485 u32 portid, u32 seq, int event, 486 unsigned int flags) 487 { 488 struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event, 489 sizeof(struct ifaddrlblmsg), flags); 490 if (!nlh) 491 return -EMSGSIZE; 492 493 ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq); 494 495 if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 || 496 nla_put_u32(skb, IFAL_LABEL, p->label) < 0) { 497 nlmsg_cancel(skb, nlh); 498 return -EMSGSIZE; 499 } 500 501 return nlmsg_end(skb, nlh); 502 } 503 504 static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) 505 { 506 struct net *net = sock_net(skb->sk); 507 struct ip6addrlbl_entry *p; 508 struct hlist_node *pos; 509 int idx = 0, s_idx = cb->args[0]; 510 int err; 511 512 rcu_read_lock(); 513 hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { 514 if (idx >= s_idx && 515 net_eq(ip6addrlbl_net(p), net)) { 516 if ((err = ip6addrlbl_fill(skb, p, 517 ip6addrlbl_table.seq, 518 NETLINK_CB(cb->skb).portid, 519 cb->nlh->nlmsg_seq, 520 RTM_NEWADDRLABEL, 521 NLM_F_MULTI)) <= 0) 522 break; 523 } 524 idx++; 525 } 526 rcu_read_unlock(); 527 cb->args[0] = idx; 528 return skb->len; 529 } 530 531 static inline int ip6addrlbl_msgsize(void) 532 { 533 return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) 534 + nla_total_size(16) /* IFAL_ADDRESS */ 535 + nla_total_size(4); /* IFAL_LABEL */ 536 } 537 538 static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, 539 void *arg) 540 { 541 struct net *net = sock_net(in_skb->sk); 542 struct ifaddrlblmsg *ifal; 543 struct nlattr *tb[IFAL_MAX+1]; 544 struct in6_addr *addr; 545 u32 lseq; 546 int err = 0; 547 struct ip6addrlbl_entry *p; 548 struct sk_buff *skb; 549 550 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); 551 if (err < 0) 552 return err; 553 554 ifal = nlmsg_data(nlh); 555 556 if (ifal->ifal_family != AF_INET6 || 557 ifal->ifal_prefixlen != 128) 558 return -EINVAL; 559 560 if (ifal->ifal_index && 561 !__dev_get_by_index(net, ifal->ifal_index)) 562 return -EINVAL; 563 564 if (!tb[IFAL_ADDRESS]) 565 return -EINVAL; 566 567 addr = nla_data(tb[IFAL_ADDRESS]); 568 if (!addr) 569 return -EINVAL; 570 571 rcu_read_lock(); 572 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); 573 if (p && ip6addrlbl_hold(p)) 574 p = NULL; 575 lseq = ip6addrlbl_table.seq; 576 rcu_read_unlock(); 577 578 if (!p) { 579 err = -ESRCH; 580 goto out; 581 } 582 583 if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) { 584 ip6addrlbl_put(p); 585 return -ENOBUFS; 586 } 587 588 err = ip6addrlbl_fill(skb, p, lseq, 589 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 590 RTM_NEWADDRLABEL, 0); 591 592 ip6addrlbl_put(p); 593 594 if (err < 0) { 595 WARN_ON(err == -EMSGSIZE); 596 kfree_skb(skb); 597 goto out; 598 } 599 600 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 601 out: 602 return err; 603 } 604 605 void __init ipv6_addr_label_rtnl_register(void) 606 { 607 __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, 608 NULL, NULL); 609 __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, 610 NULL, NULL); 611 __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, 612 ip6addrlbl_dump, NULL); 613 } 614 615