1 /* 2 * IPv6 Address Label subsystem 3 * for the IPv6 "Default" Source Address Selection 4 * 5 * Copyright (C)2007 USAGI/WIDE Project 6 */ 7 /* 8 * Author: 9 * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/list.h> 14 #include <linux/rcupdate.h> 15 #include <linux/in6.h> 16 #include <linux/slab.h> 17 #include <net/addrconf.h> 18 #include <linux/if_addrlabel.h> 19 #include <linux/netlink.h> 20 #include <linux/rtnetlink.h> 21 22 #if 0 23 #define ADDRLABEL(x...) printk(x) 24 #else 25 #define ADDRLABEL(x...) do { ; } while(0) 26 #endif 27 28 /* 29 * Policy Table 30 */ 31 struct ip6addrlbl_entry 32 { 33 #ifdef CONFIG_NET_NS 34 struct net *lbl_net; 35 #endif 36 struct in6_addr prefix; 37 int prefixlen; 38 int ifindex; 39 int addrtype; 40 u32 label; 41 struct hlist_node list; 42 atomic_t refcnt; 43 struct rcu_head rcu; 44 }; 45 46 static struct ip6addrlbl_table 47 { 48 struct hlist_head head; 49 spinlock_t lock; 50 u32 seq; 51 } ip6addrlbl_table; 52 53 static inline 54 struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) 55 { 56 return read_pnet(&lbl->lbl_net); 57 } 58 59 /* 60 * Default policy table (RFC6724 + extensions) 61 * 62 * prefix addr_type label 63 * ------------------------------------------------------------------------- 64 * ::1/128 LOOPBACK 0 65 * ::/0 N/A 1 66 * 2002::/16 N/A 2 67 * ::/96 COMPATv4 3 68 * ::ffff:0:0/96 V4MAPPED 4 69 * fc00::/7 N/A 5 ULA (RFC 4193) 70 * 2001::/32 N/A 6 Teredo (RFC 4380) 71 * 2001:10::/28 N/A 7 ORCHID (RFC 4843) 72 * fec0::/10 N/A 11 Site-local 73 * (deprecated by RFC3879) 74 * 3ffe::/16 N/A 12 6bone 75 * 76 * Note: 0xffffffff is used if we do not have any policies. 77 * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724. 78 */ 79 80 #define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL 81 82 static const __net_initconst struct ip6addrlbl_init_table 83 { 84 const struct in6_addr *prefix; 85 int prefixlen; 86 u32 label; 87 } ip6addrlbl_init_table[] = { 88 { /* ::/0 */ 89 .prefix = &in6addr_any, 90 .label = 1, 91 },{ /* fc00::/7 */ 92 .prefix = &(struct in6_addr){{{ 0xfc }}}, 93 .prefixlen = 7, 94 .label = 5, 95 },{ /* fec0::/10 */ 96 .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}}, 97 .prefixlen = 10, 98 .label = 11, 99 },{ /* 2002::/16 */ 100 .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}}, 101 .prefixlen = 16, 102 .label = 2, 103 },{ /* 3ffe::/16 */ 104 .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}}, 105 .prefixlen = 16, 106 .label = 12, 107 },{ /* 2001::/32 */ 108 .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, 109 .prefixlen = 32, 110 .label = 6, 111 },{ /* 2001:10::/28 */ 112 .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}}, 113 .prefixlen = 28, 114 .label = 7, 115 },{ /* ::ffff:0:0 */ 116 .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}}, 117 .prefixlen = 96, 118 .label = 4, 119 },{ /* ::/96 */ 120 .prefix = &in6addr_any, 121 .prefixlen = 96, 122 .label = 3, 123 },{ /* ::1/128 */ 124 .prefix = &in6addr_loopback, 125 .prefixlen = 128, 126 .label = 0, 127 } 128 }; 129 130 /* Object management */ 131 static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) 132 { 133 #ifdef CONFIG_NET_NS 134 release_net(p->lbl_net); 135 #endif 136 kfree(p); 137 } 138 139 static void ip6addrlbl_free_rcu(struct rcu_head *h) 140 { 141 ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); 142 } 143 144 static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p) 145 { 146 return atomic_inc_not_zero(&p->refcnt); 147 } 148 149 static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) 150 { 151 if (atomic_dec_and_test(&p->refcnt)) 152 call_rcu(&p->rcu, ip6addrlbl_free_rcu); 153 } 154 155 /* Find label */ 156 static bool __ip6addrlbl_match(struct net *net, 157 const struct ip6addrlbl_entry *p, 158 const struct in6_addr *addr, 159 int addrtype, int ifindex) 160 { 161 if (!net_eq(ip6addrlbl_net(p), net)) 162 return false; 163 if (p->ifindex && p->ifindex != ifindex) 164 return false; 165 if (p->addrtype && p->addrtype != addrtype) 166 return false; 167 if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen)) 168 return false; 169 return true; 170 } 171 172 static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, 173 const struct in6_addr *addr, 174 int type, int ifindex) 175 { 176 struct ip6addrlbl_entry *p; 177 hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { 178 if (__ip6addrlbl_match(net, p, addr, type, ifindex)) 179 return p; 180 } 181 return NULL; 182 } 183 184 u32 ipv6_addr_label(struct net *net, 185 const struct in6_addr *addr, int type, int ifindex) 186 { 187 u32 label; 188 struct ip6addrlbl_entry *p; 189 190 type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK; 191 192 rcu_read_lock(); 193 p = __ipv6_addr_label(net, addr, type, ifindex); 194 label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; 195 rcu_read_unlock(); 196 197 ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", 198 __func__, addr, type, ifindex, label); 199 200 return label; 201 } 202 203 /* allocate one entry */ 204 static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, 205 const struct in6_addr *prefix, 206 int prefixlen, int ifindex, 207 u32 label) 208 { 209 struct ip6addrlbl_entry *newp; 210 int addrtype; 211 212 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", 213 __func__, prefix, prefixlen, ifindex, (unsigned int)label); 214 215 addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); 216 217 switch (addrtype) { 218 case IPV6_ADDR_MAPPED: 219 if (prefixlen > 96) 220 return ERR_PTR(-EINVAL); 221 if (prefixlen < 96) 222 addrtype = 0; 223 break; 224 case IPV6_ADDR_COMPATv4: 225 if (prefixlen != 96) 226 addrtype = 0; 227 break; 228 case IPV6_ADDR_LOOPBACK: 229 if (prefixlen != 128) 230 addrtype = 0; 231 break; 232 } 233 234 newp = kmalloc(sizeof(*newp), GFP_KERNEL); 235 if (!newp) 236 return ERR_PTR(-ENOMEM); 237 238 ipv6_addr_prefix(&newp->prefix, prefix, prefixlen); 239 newp->prefixlen = prefixlen; 240 newp->ifindex = ifindex; 241 newp->addrtype = addrtype; 242 newp->label = label; 243 INIT_HLIST_NODE(&newp->list); 244 #ifdef CONFIG_NET_NS 245 newp->lbl_net = hold_net(net); 246 #endif 247 atomic_set(&newp->refcnt, 1); 248 return newp; 249 } 250 251 /* add a label */ 252 static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) 253 { 254 struct hlist_node *n; 255 struct ip6addrlbl_entry *last = NULL, *p = NULL; 256 int ret = 0; 257 258 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp, 259 replace); 260 261 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 262 if (p->prefixlen == newp->prefixlen && 263 net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && 264 p->ifindex == newp->ifindex && 265 ipv6_addr_equal(&p->prefix, &newp->prefix)) { 266 if (!replace) { 267 ret = -EEXIST; 268 goto out; 269 } 270 hlist_replace_rcu(&p->list, &newp->list); 271 ip6addrlbl_put(p); 272 goto out; 273 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || 274 (p->prefixlen < newp->prefixlen)) { 275 hlist_add_before_rcu(&newp->list, &p->list); 276 goto out; 277 } 278 last = p; 279 } 280 if (last) 281 hlist_add_after_rcu(&last->list, &newp->list); 282 else 283 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); 284 out: 285 if (!ret) 286 ip6addrlbl_table.seq++; 287 return ret; 288 } 289 290 /* add a label */ 291 static int ip6addrlbl_add(struct net *net, 292 const struct in6_addr *prefix, int prefixlen, 293 int ifindex, u32 label, int replace) 294 { 295 struct ip6addrlbl_entry *newp; 296 int ret = 0; 297 298 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", 299 __func__, prefix, prefixlen, ifindex, (unsigned int)label, 300 replace); 301 302 newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); 303 if (IS_ERR(newp)) 304 return PTR_ERR(newp); 305 spin_lock(&ip6addrlbl_table.lock); 306 ret = __ip6addrlbl_add(newp, replace); 307 spin_unlock(&ip6addrlbl_table.lock); 308 if (ret) 309 ip6addrlbl_free(newp); 310 return ret; 311 } 312 313 /* remove a label */ 314 static int __ip6addrlbl_del(struct net *net, 315 const struct in6_addr *prefix, int prefixlen, 316 int ifindex) 317 { 318 struct ip6addrlbl_entry *p = NULL; 319 struct hlist_node *n; 320 int ret = -ESRCH; 321 322 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 323 __func__, prefix, prefixlen, ifindex); 324 325 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 326 if (p->prefixlen == prefixlen && 327 net_eq(ip6addrlbl_net(p), net) && 328 p->ifindex == ifindex && 329 ipv6_addr_equal(&p->prefix, prefix)) { 330 hlist_del_rcu(&p->list); 331 ip6addrlbl_put(p); 332 ret = 0; 333 break; 334 } 335 } 336 return ret; 337 } 338 339 static int ip6addrlbl_del(struct net *net, 340 const struct in6_addr *prefix, int prefixlen, 341 int ifindex) 342 { 343 struct in6_addr prefix_buf; 344 int ret; 345 346 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 347 __func__, prefix, prefixlen, ifindex); 348 349 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); 350 spin_lock(&ip6addrlbl_table.lock); 351 ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); 352 spin_unlock(&ip6addrlbl_table.lock); 353 return ret; 354 } 355 356 /* add default label */ 357 static int __net_init ip6addrlbl_net_init(struct net *net) 358 { 359 int err = 0; 360 int i; 361 362 ADDRLABEL(KERN_DEBUG "%s\n", __func__); 363 364 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { 365 int ret = ip6addrlbl_add(net, 366 ip6addrlbl_init_table[i].prefix, 367 ip6addrlbl_init_table[i].prefixlen, 368 0, 369 ip6addrlbl_init_table[i].label, 0); 370 /* XXX: should we free all rules when we catch an error? */ 371 if (ret && (!err || err != -ENOMEM)) 372 err = ret; 373 } 374 return err; 375 } 376 377 static void __net_exit ip6addrlbl_net_exit(struct net *net) 378 { 379 struct ip6addrlbl_entry *p = NULL; 380 struct hlist_node *n; 381 382 /* Remove all labels belonging to the exiting net */ 383 spin_lock(&ip6addrlbl_table.lock); 384 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 385 if (net_eq(ip6addrlbl_net(p), net)) { 386 hlist_del_rcu(&p->list); 387 ip6addrlbl_put(p); 388 } 389 } 390 spin_unlock(&ip6addrlbl_table.lock); 391 } 392 393 static struct pernet_operations ipv6_addr_label_ops = { 394 .init = ip6addrlbl_net_init, 395 .exit = ip6addrlbl_net_exit, 396 }; 397 398 int __init ipv6_addr_label_init(void) 399 { 400 spin_lock_init(&ip6addrlbl_table.lock); 401 402 return register_pernet_subsys(&ipv6_addr_label_ops); 403 } 404 405 void ipv6_addr_label_cleanup(void) 406 { 407 unregister_pernet_subsys(&ipv6_addr_label_ops); 408 } 409 410 static const struct nla_policy ifal_policy[IFAL_MAX+1] = { 411 [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, 412 [IFAL_LABEL] = { .len = sizeof(u32), }, 413 }; 414 415 static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh) 416 { 417 struct net *net = sock_net(skb->sk); 418 struct ifaddrlblmsg *ifal; 419 struct nlattr *tb[IFAL_MAX+1]; 420 struct in6_addr *pfx; 421 u32 label; 422 int err = 0; 423 424 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); 425 if (err < 0) 426 return err; 427 428 ifal = nlmsg_data(nlh); 429 430 if (ifal->ifal_family != AF_INET6 || 431 ifal->ifal_prefixlen > 128) 432 return -EINVAL; 433 434 if (!tb[IFAL_ADDRESS]) 435 return -EINVAL; 436 pfx = nla_data(tb[IFAL_ADDRESS]); 437 438 if (!tb[IFAL_LABEL]) 439 return -EINVAL; 440 label = nla_get_u32(tb[IFAL_LABEL]); 441 if (label == IPV6_ADDR_LABEL_DEFAULT) 442 return -EINVAL; 443 444 switch(nlh->nlmsg_type) { 445 case RTM_NEWADDRLABEL: 446 if (ifal->ifal_index && 447 !__dev_get_by_index(net, ifal->ifal_index)) 448 return -EINVAL; 449 450 err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen, 451 ifal->ifal_index, label, 452 nlh->nlmsg_flags & NLM_F_REPLACE); 453 break; 454 case RTM_DELADDRLABEL: 455 err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen, 456 ifal->ifal_index); 457 break; 458 default: 459 err = -EOPNOTSUPP; 460 } 461 return err; 462 } 463 464 static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, 465 int prefixlen, int ifindex, u32 lseq) 466 { 467 struct ifaddrlblmsg *ifal = nlmsg_data(nlh); 468 ifal->ifal_family = AF_INET6; 469 ifal->ifal_prefixlen = prefixlen; 470 ifal->ifal_flags = 0; 471 ifal->ifal_index = ifindex; 472 ifal->ifal_seq = lseq; 473 }; 474 475 static int ip6addrlbl_fill(struct sk_buff *skb, 476 struct ip6addrlbl_entry *p, 477 u32 lseq, 478 u32 portid, u32 seq, int event, 479 unsigned int flags) 480 { 481 struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event, 482 sizeof(struct ifaddrlblmsg), flags); 483 if (!nlh) 484 return -EMSGSIZE; 485 486 ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq); 487 488 if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 || 489 nla_put_u32(skb, IFAL_LABEL, p->label) < 0) { 490 nlmsg_cancel(skb, nlh); 491 return -EMSGSIZE; 492 } 493 494 return nlmsg_end(skb, nlh); 495 } 496 497 static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) 498 { 499 struct net *net = sock_net(skb->sk); 500 struct ip6addrlbl_entry *p; 501 int idx = 0, s_idx = cb->args[0]; 502 int err; 503 504 rcu_read_lock(); 505 hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { 506 if (idx >= s_idx && 507 net_eq(ip6addrlbl_net(p), net)) { 508 if ((err = ip6addrlbl_fill(skb, p, 509 ip6addrlbl_table.seq, 510 NETLINK_CB(cb->skb).portid, 511 cb->nlh->nlmsg_seq, 512 RTM_NEWADDRLABEL, 513 NLM_F_MULTI)) <= 0) 514 break; 515 } 516 idx++; 517 } 518 rcu_read_unlock(); 519 cb->args[0] = idx; 520 return skb->len; 521 } 522 523 static inline int ip6addrlbl_msgsize(void) 524 { 525 return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) 526 + nla_total_size(16) /* IFAL_ADDRESS */ 527 + nla_total_size(4); /* IFAL_LABEL */ 528 } 529 530 static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh) 531 { 532 struct net *net = sock_net(in_skb->sk); 533 struct ifaddrlblmsg *ifal; 534 struct nlattr *tb[IFAL_MAX+1]; 535 struct in6_addr *addr; 536 u32 lseq; 537 int err = 0; 538 struct ip6addrlbl_entry *p; 539 struct sk_buff *skb; 540 541 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); 542 if (err < 0) 543 return err; 544 545 ifal = nlmsg_data(nlh); 546 547 if (ifal->ifal_family != AF_INET6 || 548 ifal->ifal_prefixlen != 128) 549 return -EINVAL; 550 551 if (ifal->ifal_index && 552 !__dev_get_by_index(net, ifal->ifal_index)) 553 return -EINVAL; 554 555 if (!tb[IFAL_ADDRESS]) 556 return -EINVAL; 557 addr = nla_data(tb[IFAL_ADDRESS]); 558 559 rcu_read_lock(); 560 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); 561 if (p && ip6addrlbl_hold(p)) 562 p = NULL; 563 lseq = ip6addrlbl_table.seq; 564 rcu_read_unlock(); 565 566 if (!p) { 567 err = -ESRCH; 568 goto out; 569 } 570 571 if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) { 572 ip6addrlbl_put(p); 573 return -ENOBUFS; 574 } 575 576 err = ip6addrlbl_fill(skb, p, lseq, 577 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 578 RTM_NEWADDRLABEL, 0); 579 580 ip6addrlbl_put(p); 581 582 if (err < 0) { 583 WARN_ON(err == -EMSGSIZE); 584 kfree_skb(skb); 585 goto out; 586 } 587 588 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 589 out: 590 return err; 591 } 592 593 void __init ipv6_addr_label_rtnl_register(void) 594 { 595 __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, 596 NULL, NULL); 597 __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, 598 NULL, NULL); 599 __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, 600 ip6addrlbl_dump, NULL); 601 } 602 603