1 /* 2 * IPv6 Address Label subsystem 3 * for the IPv6 "Default" Source Address Selection 4 * 5 * Copyright (C)2007 USAGI/WIDE Project 6 */ 7 /* 8 * Author: 9 * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/list.h> 14 #include <linux/rcupdate.h> 15 #include <linux/in6.h> 16 #include <linux/slab.h> 17 #include <net/addrconf.h> 18 #include <linux/if_addrlabel.h> 19 #include <linux/netlink.h> 20 #include <linux/rtnetlink.h> 21 22 #if 0 23 #define ADDRLABEL(x...) printk(x) 24 #else 25 #define ADDRLABEL(x...) do { ; } while (0) 26 #endif 27 28 /* 29 * Policy Table 30 */ 31 struct ip6addrlbl_entry { 32 #ifdef CONFIG_NET_NS 33 struct net *lbl_net; 34 #endif 35 struct in6_addr prefix; 36 int prefixlen; 37 int ifindex; 38 int addrtype; 39 u32 label; 40 struct hlist_node list; 41 atomic_t refcnt; 42 struct rcu_head rcu; 43 }; 44 45 static struct ip6addrlbl_table 46 { 47 struct hlist_head head; 48 spinlock_t lock; 49 u32 seq; 50 } ip6addrlbl_table; 51 52 static inline 53 struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) 54 { 55 return read_pnet(&lbl->lbl_net); 56 } 57 58 /* 59 * Default policy table (RFC6724 + extensions) 60 * 61 * prefix addr_type label 62 * ------------------------------------------------------------------------- 63 * ::1/128 LOOPBACK 0 64 * ::/0 N/A 1 65 * 2002::/16 N/A 2 66 * ::/96 COMPATv4 3 67 * ::ffff:0:0/96 V4MAPPED 4 68 * fc00::/7 N/A 5 ULA (RFC 4193) 69 * 2001::/32 N/A 6 Teredo (RFC 4380) 70 * 2001:10::/28 N/A 7 ORCHID (RFC 4843) 71 * fec0::/10 N/A 11 Site-local 72 * (deprecated by RFC3879) 73 * 3ffe::/16 N/A 12 6bone 74 * 75 * Note: 0xffffffff is used if we do not have any policies. 76 * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724. 77 */ 78 79 #define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL 80 81 static const __net_initconst struct ip6addrlbl_init_table 82 { 83 const struct in6_addr *prefix; 84 int prefixlen; 85 u32 label; 86 } ip6addrlbl_init_table[] = { 87 { /* ::/0 */ 88 .prefix = &in6addr_any, 89 .label = 1, 90 }, { /* fc00::/7 */ 91 .prefix = &(struct in6_addr){ { { 0xfc } } } , 92 .prefixlen = 7, 93 .label = 5, 94 }, { /* fec0::/10 */ 95 .prefix = &(struct in6_addr){ { { 0xfe, 0xc0 } } }, 96 .prefixlen = 10, 97 .label = 11, 98 }, { /* 2002::/16 */ 99 .prefix = &(struct in6_addr){ { { 0x20, 0x02 } } }, 100 .prefixlen = 16, 101 .label = 2, 102 }, { /* 3ffe::/16 */ 103 .prefix = &(struct in6_addr){ { { 0x3f, 0xfe } } }, 104 .prefixlen = 16, 105 .label = 12, 106 }, { /* 2001::/32 */ 107 .prefix = &(struct in6_addr){ { { 0x20, 0x01 } } }, 108 .prefixlen = 32, 109 .label = 6, 110 }, { /* 2001:10::/28 */ 111 .prefix = &(struct in6_addr){ { { 0x20, 0x01, 0x00, 0x10 } } }, 112 .prefixlen = 28, 113 .label = 7, 114 }, { /* ::ffff:0:0 */ 115 .prefix = &(struct in6_addr){ { { [10] = 0xff, [11] = 0xff } } }, 116 .prefixlen = 96, 117 .label = 4, 118 }, { /* ::/96 */ 119 .prefix = &in6addr_any, 120 .prefixlen = 96, 121 .label = 3, 122 }, { /* ::1/128 */ 123 .prefix = &in6addr_loopback, 124 .prefixlen = 128, 125 .label = 0, 126 } 127 }; 128 129 /* Object management */ 130 static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) 131 { 132 #ifdef CONFIG_NET_NS 133 release_net(p->lbl_net); 134 #endif 135 kfree(p); 136 } 137 138 static void ip6addrlbl_free_rcu(struct rcu_head *h) 139 { 140 ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); 141 } 142 143 static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p) 144 { 145 return atomic_inc_not_zero(&p->refcnt); 146 } 147 148 static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) 149 { 150 if (atomic_dec_and_test(&p->refcnt)) 151 call_rcu(&p->rcu, ip6addrlbl_free_rcu); 152 } 153 154 /* Find label */ 155 static bool __ip6addrlbl_match(struct net *net, 156 const struct ip6addrlbl_entry *p, 157 const struct in6_addr *addr, 158 int addrtype, int ifindex) 159 { 160 if (!net_eq(ip6addrlbl_net(p), net)) 161 return false; 162 if (p->ifindex && p->ifindex != ifindex) 163 return false; 164 if (p->addrtype && p->addrtype != addrtype) 165 return false; 166 if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen)) 167 return false; 168 return true; 169 } 170 171 static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, 172 const struct in6_addr *addr, 173 int type, int ifindex) 174 { 175 struct ip6addrlbl_entry *p; 176 hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { 177 if (__ip6addrlbl_match(net, p, addr, type, ifindex)) 178 return p; 179 } 180 return NULL; 181 } 182 183 u32 ipv6_addr_label(struct net *net, 184 const struct in6_addr *addr, int type, int ifindex) 185 { 186 u32 label; 187 struct ip6addrlbl_entry *p; 188 189 type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK; 190 191 rcu_read_lock(); 192 p = __ipv6_addr_label(net, addr, type, ifindex); 193 label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; 194 rcu_read_unlock(); 195 196 ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", 197 __func__, addr, type, ifindex, label); 198 199 return label; 200 } 201 202 /* allocate one entry */ 203 static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, 204 const struct in6_addr *prefix, 205 int prefixlen, int ifindex, 206 u32 label) 207 { 208 struct ip6addrlbl_entry *newp; 209 int addrtype; 210 211 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", 212 __func__, prefix, prefixlen, ifindex, (unsigned int)label); 213 214 addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); 215 216 switch (addrtype) { 217 case IPV6_ADDR_MAPPED: 218 if (prefixlen > 96) 219 return ERR_PTR(-EINVAL); 220 if (prefixlen < 96) 221 addrtype = 0; 222 break; 223 case IPV6_ADDR_COMPATv4: 224 if (prefixlen != 96) 225 addrtype = 0; 226 break; 227 case IPV6_ADDR_LOOPBACK: 228 if (prefixlen != 128) 229 addrtype = 0; 230 break; 231 } 232 233 newp = kmalloc(sizeof(*newp), GFP_KERNEL); 234 if (!newp) 235 return ERR_PTR(-ENOMEM); 236 237 ipv6_addr_prefix(&newp->prefix, prefix, prefixlen); 238 newp->prefixlen = prefixlen; 239 newp->ifindex = ifindex; 240 newp->addrtype = addrtype; 241 newp->label = label; 242 INIT_HLIST_NODE(&newp->list); 243 #ifdef CONFIG_NET_NS 244 newp->lbl_net = hold_net(net); 245 #endif 246 atomic_set(&newp->refcnt, 1); 247 return newp; 248 } 249 250 /* add a label */ 251 static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) 252 { 253 struct hlist_node *n; 254 struct ip6addrlbl_entry *last = NULL, *p = NULL; 255 int ret = 0; 256 257 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp, 258 replace); 259 260 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 261 if (p->prefixlen == newp->prefixlen && 262 net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && 263 p->ifindex == newp->ifindex && 264 ipv6_addr_equal(&p->prefix, &newp->prefix)) { 265 if (!replace) { 266 ret = -EEXIST; 267 goto out; 268 } 269 hlist_replace_rcu(&p->list, &newp->list); 270 ip6addrlbl_put(p); 271 goto out; 272 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || 273 (p->prefixlen < newp->prefixlen)) { 274 hlist_add_before_rcu(&newp->list, &p->list); 275 goto out; 276 } 277 last = p; 278 } 279 if (last) 280 hlist_add_behind_rcu(&newp->list, &last->list); 281 else 282 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); 283 out: 284 if (!ret) 285 ip6addrlbl_table.seq++; 286 return ret; 287 } 288 289 /* add a label */ 290 static int ip6addrlbl_add(struct net *net, 291 const struct in6_addr *prefix, int prefixlen, 292 int ifindex, u32 label, int replace) 293 { 294 struct ip6addrlbl_entry *newp; 295 int ret = 0; 296 297 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", 298 __func__, prefix, prefixlen, ifindex, (unsigned int)label, 299 replace); 300 301 newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); 302 if (IS_ERR(newp)) 303 return PTR_ERR(newp); 304 spin_lock(&ip6addrlbl_table.lock); 305 ret = __ip6addrlbl_add(newp, replace); 306 spin_unlock(&ip6addrlbl_table.lock); 307 if (ret) 308 ip6addrlbl_free(newp); 309 return ret; 310 } 311 312 /* remove a label */ 313 static int __ip6addrlbl_del(struct net *net, 314 const struct in6_addr *prefix, int prefixlen, 315 int ifindex) 316 { 317 struct ip6addrlbl_entry *p = NULL; 318 struct hlist_node *n; 319 int ret = -ESRCH; 320 321 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 322 __func__, prefix, prefixlen, ifindex); 323 324 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 325 if (p->prefixlen == prefixlen && 326 net_eq(ip6addrlbl_net(p), net) && 327 p->ifindex == ifindex && 328 ipv6_addr_equal(&p->prefix, prefix)) { 329 hlist_del_rcu(&p->list); 330 ip6addrlbl_put(p); 331 ret = 0; 332 break; 333 } 334 } 335 return ret; 336 } 337 338 static int ip6addrlbl_del(struct net *net, 339 const struct in6_addr *prefix, int prefixlen, 340 int ifindex) 341 { 342 struct in6_addr prefix_buf; 343 int ret; 344 345 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 346 __func__, prefix, prefixlen, ifindex); 347 348 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); 349 spin_lock(&ip6addrlbl_table.lock); 350 ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); 351 spin_unlock(&ip6addrlbl_table.lock); 352 return ret; 353 } 354 355 /* add default label */ 356 static int __net_init ip6addrlbl_net_init(struct net *net) 357 { 358 int err = 0; 359 int i; 360 361 ADDRLABEL(KERN_DEBUG "%s\n", __func__); 362 363 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { 364 int ret = ip6addrlbl_add(net, 365 ip6addrlbl_init_table[i].prefix, 366 ip6addrlbl_init_table[i].prefixlen, 367 0, 368 ip6addrlbl_init_table[i].label, 0); 369 /* XXX: should we free all rules when we catch an error? */ 370 if (ret && (!err || err != -ENOMEM)) 371 err = ret; 372 } 373 return err; 374 } 375 376 static void __net_exit ip6addrlbl_net_exit(struct net *net) 377 { 378 struct ip6addrlbl_entry *p = NULL; 379 struct hlist_node *n; 380 381 /* Remove all labels belonging to the exiting net */ 382 spin_lock(&ip6addrlbl_table.lock); 383 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 384 if (net_eq(ip6addrlbl_net(p), net)) { 385 hlist_del_rcu(&p->list); 386 ip6addrlbl_put(p); 387 } 388 } 389 spin_unlock(&ip6addrlbl_table.lock); 390 } 391 392 static struct pernet_operations ipv6_addr_label_ops = { 393 .init = ip6addrlbl_net_init, 394 .exit = ip6addrlbl_net_exit, 395 }; 396 397 int __init ipv6_addr_label_init(void) 398 { 399 spin_lock_init(&ip6addrlbl_table.lock); 400 401 return register_pernet_subsys(&ipv6_addr_label_ops); 402 } 403 404 void ipv6_addr_label_cleanup(void) 405 { 406 unregister_pernet_subsys(&ipv6_addr_label_ops); 407 } 408 409 static const struct nla_policy ifal_policy[IFAL_MAX+1] = { 410 [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, 411 [IFAL_LABEL] = { .len = sizeof(u32), }, 412 }; 413 414 static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh) 415 { 416 struct net *net = sock_net(skb->sk); 417 struct ifaddrlblmsg *ifal; 418 struct nlattr *tb[IFAL_MAX+1]; 419 struct in6_addr *pfx; 420 u32 label; 421 int err = 0; 422 423 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); 424 if (err < 0) 425 return err; 426 427 ifal = nlmsg_data(nlh); 428 429 if (ifal->ifal_family != AF_INET6 || 430 ifal->ifal_prefixlen > 128) 431 return -EINVAL; 432 433 if (!tb[IFAL_ADDRESS]) 434 return -EINVAL; 435 pfx = nla_data(tb[IFAL_ADDRESS]); 436 437 if (!tb[IFAL_LABEL]) 438 return -EINVAL; 439 label = nla_get_u32(tb[IFAL_LABEL]); 440 if (label == IPV6_ADDR_LABEL_DEFAULT) 441 return -EINVAL; 442 443 switch (nlh->nlmsg_type) { 444 case RTM_NEWADDRLABEL: 445 if (ifal->ifal_index && 446 !__dev_get_by_index(net, ifal->ifal_index)) 447 return -EINVAL; 448 449 err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen, 450 ifal->ifal_index, label, 451 nlh->nlmsg_flags & NLM_F_REPLACE); 452 break; 453 case RTM_DELADDRLABEL: 454 err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen, 455 ifal->ifal_index); 456 break; 457 default: 458 err = -EOPNOTSUPP; 459 } 460 return err; 461 } 462 463 static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, 464 int prefixlen, int ifindex, u32 lseq) 465 { 466 struct ifaddrlblmsg *ifal = nlmsg_data(nlh); 467 ifal->ifal_family = AF_INET6; 468 ifal->ifal_prefixlen = prefixlen; 469 ifal->ifal_flags = 0; 470 ifal->ifal_index = ifindex; 471 ifal->ifal_seq = lseq; 472 }; 473 474 static int ip6addrlbl_fill(struct sk_buff *skb, 475 struct ip6addrlbl_entry *p, 476 u32 lseq, 477 u32 portid, u32 seq, int event, 478 unsigned int flags) 479 { 480 struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event, 481 sizeof(struct ifaddrlblmsg), flags); 482 if (!nlh) 483 return -EMSGSIZE; 484 485 ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq); 486 487 if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 || 488 nla_put_u32(skb, IFAL_LABEL, p->label) < 0) { 489 nlmsg_cancel(skb, nlh); 490 return -EMSGSIZE; 491 } 492 493 return nlmsg_end(skb, nlh); 494 } 495 496 static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) 497 { 498 struct net *net = sock_net(skb->sk); 499 struct ip6addrlbl_entry *p; 500 int idx = 0, s_idx = cb->args[0]; 501 int err; 502 503 rcu_read_lock(); 504 hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { 505 if (idx >= s_idx && 506 net_eq(ip6addrlbl_net(p), net)) { 507 err = ip6addrlbl_fill(skb, p, 508 ip6addrlbl_table.seq, 509 NETLINK_CB(cb->skb).portid, 510 cb->nlh->nlmsg_seq, 511 RTM_NEWADDRLABEL, 512 NLM_F_MULTI); 513 if (err <= 0) 514 break; 515 } 516 idx++; 517 } 518 rcu_read_unlock(); 519 cb->args[0] = idx; 520 return skb->len; 521 } 522 523 static inline int ip6addrlbl_msgsize(void) 524 { 525 return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) 526 + nla_total_size(16) /* IFAL_ADDRESS */ 527 + nla_total_size(4); /* IFAL_LABEL */ 528 } 529 530 static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh) 531 { 532 struct net *net = sock_net(in_skb->sk); 533 struct ifaddrlblmsg *ifal; 534 struct nlattr *tb[IFAL_MAX+1]; 535 struct in6_addr *addr; 536 u32 lseq; 537 int err = 0; 538 struct ip6addrlbl_entry *p; 539 struct sk_buff *skb; 540 541 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); 542 if (err < 0) 543 return err; 544 545 ifal = nlmsg_data(nlh); 546 547 if (ifal->ifal_family != AF_INET6 || 548 ifal->ifal_prefixlen != 128) 549 return -EINVAL; 550 551 if (ifal->ifal_index && 552 !__dev_get_by_index(net, ifal->ifal_index)) 553 return -EINVAL; 554 555 if (!tb[IFAL_ADDRESS]) 556 return -EINVAL; 557 addr = nla_data(tb[IFAL_ADDRESS]); 558 559 rcu_read_lock(); 560 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); 561 if (p && ip6addrlbl_hold(p)) 562 p = NULL; 563 lseq = ip6addrlbl_table.seq; 564 rcu_read_unlock(); 565 566 if (!p) { 567 err = -ESRCH; 568 goto out; 569 } 570 571 skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL); 572 if (!skb) { 573 ip6addrlbl_put(p); 574 return -ENOBUFS; 575 } 576 577 err = ip6addrlbl_fill(skb, p, lseq, 578 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 579 RTM_NEWADDRLABEL, 0); 580 581 ip6addrlbl_put(p); 582 583 if (err < 0) { 584 WARN_ON(err == -EMSGSIZE); 585 kfree_skb(skb); 586 goto out; 587 } 588 589 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 590 out: 591 return err; 592 } 593 594 void __init ipv6_addr_label_rtnl_register(void) 595 { 596 __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, 597 NULL, NULL); 598 __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, 599 NULL, NULL); 600 __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, 601 ip6addrlbl_dump, NULL); 602 } 603 604