1 /* 2 * IPv6 Address Label subsystem 3 * for the IPv6 "Default" Source Address Selection 4 * 5 * Copyright (C)2007 USAGI/WIDE Project 6 */ 7 /* 8 * Author: 9 * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/list.h> 14 #include <linux/rcupdate.h> 15 #include <linux/in6.h> 16 #include <linux/slab.h> 17 #include <net/addrconf.h> 18 #include <linux/if_addrlabel.h> 19 #include <linux/netlink.h> 20 #include <linux/rtnetlink.h> 21 22 #if 0 23 #define ADDRLABEL(x...) printk(x) 24 #else 25 #define ADDRLABEL(x...) do { ; } while(0) 26 #endif 27 28 /* 29 * Policy Table 30 */ 31 struct ip6addrlbl_entry 32 { 33 #ifdef CONFIG_NET_NS 34 struct net *lbl_net; 35 #endif 36 struct in6_addr prefix; 37 int prefixlen; 38 int ifindex; 39 int addrtype; 40 u32 label; 41 struct hlist_node list; 42 atomic_t refcnt; 43 struct rcu_head rcu; 44 }; 45 46 static struct ip6addrlbl_table 47 { 48 struct hlist_head head; 49 spinlock_t lock; 50 u32 seq; 51 } ip6addrlbl_table; 52 53 static inline 54 struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) 55 { 56 return read_pnet(&lbl->lbl_net); 57 } 58 59 /* 60 * Default policy table (RFC6724 + extensions) 61 * 62 * prefix addr_type label 63 * ------------------------------------------------------------------------- 64 * ::1/128 LOOPBACK 0 65 * ::/0 N/A 1 66 * 2002::/16 N/A 2 67 * ::/96 COMPATv4 3 68 * ::ffff:0:0/96 V4MAPPED 4 69 * fc00::/7 N/A 5 ULA (RFC 4193) 70 * 2001::/32 N/A 6 Teredo (RFC 4380) 71 * 2001:10::/28 N/A 7 ORCHID (RFC 4843) 72 * fec0::/10 N/A 11 Site-local 73 * (deprecated by RFC3879) 74 * 3ffe::/16 N/A 12 6bone 75 * 76 * Note: 0xffffffff is used if we do not have any policies. 77 * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724. 78 */ 79 80 #define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL 81 82 static const __net_initconst struct ip6addrlbl_init_table 83 { 84 const struct in6_addr *prefix; 85 int prefixlen; 86 u32 label; 87 } ip6addrlbl_init_table[] = { 88 { /* ::/0 */ 89 .prefix = &in6addr_any, 90 .label = 1, 91 },{ /* fc00::/7 */ 92 .prefix = &(struct in6_addr){{{ 0xfc }}}, 93 .prefixlen = 7, 94 .label = 5, 95 },{ /* fec0::/10 */ 96 .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}}, 97 .prefixlen = 10, 98 .label = 11, 99 },{ /* 2002::/16 */ 100 .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}}, 101 .prefixlen = 16, 102 .label = 2, 103 },{ /* 3ffe::/16 */ 104 .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}}, 105 .prefixlen = 16, 106 .label = 12, 107 },{ /* 2001::/32 */ 108 .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, 109 .prefixlen = 32, 110 .label = 6, 111 },{ /* 2001:10::/28 */ 112 .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}}, 113 .prefixlen = 28, 114 .label = 7, 115 },{ /* ::ffff:0:0 */ 116 .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}}, 117 .prefixlen = 96, 118 .label = 4, 119 },{ /* ::/96 */ 120 .prefix = &in6addr_any, 121 .prefixlen = 96, 122 .label = 3, 123 },{ /* ::1/128 */ 124 .prefix = &in6addr_loopback, 125 .prefixlen = 128, 126 .label = 0, 127 } 128 }; 129 130 /* Object management */ 131 static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) 132 { 133 #ifdef CONFIG_NET_NS 134 release_net(p->lbl_net); 135 #endif 136 kfree(p); 137 } 138 139 static void ip6addrlbl_free_rcu(struct rcu_head *h) 140 { 141 ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); 142 } 143 144 static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p) 145 { 146 return atomic_inc_not_zero(&p->refcnt); 147 } 148 149 static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) 150 { 151 if (atomic_dec_and_test(&p->refcnt)) 152 call_rcu(&p->rcu, ip6addrlbl_free_rcu); 153 } 154 155 /* Find label */ 156 static bool __ip6addrlbl_match(struct net *net, 157 const struct ip6addrlbl_entry *p, 158 const struct in6_addr *addr, 159 int addrtype, int ifindex) 160 { 161 if (!net_eq(ip6addrlbl_net(p), net)) 162 return false; 163 if (p->ifindex && p->ifindex != ifindex) 164 return false; 165 if (p->addrtype && p->addrtype != addrtype) 166 return false; 167 if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen)) 168 return false; 169 return true; 170 } 171 172 static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, 173 const struct in6_addr *addr, 174 int type, int ifindex) 175 { 176 struct ip6addrlbl_entry *p; 177 hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { 178 if (__ip6addrlbl_match(net, p, addr, type, ifindex)) 179 return p; 180 } 181 return NULL; 182 } 183 184 u32 ipv6_addr_label(struct net *net, 185 const struct in6_addr *addr, int type, int ifindex) 186 { 187 u32 label; 188 struct ip6addrlbl_entry *p; 189 190 type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK; 191 192 rcu_read_lock(); 193 p = __ipv6_addr_label(net, addr, type, ifindex); 194 label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; 195 rcu_read_unlock(); 196 197 ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", 198 __func__, addr, type, ifindex, label); 199 200 return label; 201 } 202 203 /* allocate one entry */ 204 static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, 205 const struct in6_addr *prefix, 206 int prefixlen, int ifindex, 207 u32 label) 208 { 209 struct ip6addrlbl_entry *newp; 210 int addrtype; 211 212 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", 213 __func__, prefix, prefixlen, ifindex, (unsigned int)label); 214 215 addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); 216 217 switch (addrtype) { 218 case IPV6_ADDR_MAPPED: 219 if (prefixlen > 96) 220 return ERR_PTR(-EINVAL); 221 if (prefixlen < 96) 222 addrtype = 0; 223 break; 224 case IPV6_ADDR_COMPATv4: 225 if (prefixlen != 96) 226 addrtype = 0; 227 break; 228 case IPV6_ADDR_LOOPBACK: 229 if (prefixlen != 128) 230 addrtype = 0; 231 break; 232 } 233 234 newp = kmalloc(sizeof(*newp), GFP_KERNEL); 235 if (!newp) 236 return ERR_PTR(-ENOMEM); 237 238 ipv6_addr_prefix(&newp->prefix, prefix, prefixlen); 239 newp->prefixlen = prefixlen; 240 newp->ifindex = ifindex; 241 newp->addrtype = addrtype; 242 newp->label = label; 243 INIT_HLIST_NODE(&newp->list); 244 #ifdef CONFIG_NET_NS 245 newp->lbl_net = hold_net(net); 246 #endif 247 atomic_set(&newp->refcnt, 1); 248 return newp; 249 } 250 251 /* add a label */ 252 static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) 253 { 254 int ret = 0; 255 256 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", 257 __func__, 258 newp, replace); 259 260 if (hlist_empty(&ip6addrlbl_table.head)) { 261 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); 262 } else { 263 struct hlist_node *n; 264 struct ip6addrlbl_entry *p = NULL; 265 hlist_for_each_entry_safe(p, n, 266 &ip6addrlbl_table.head, list) { 267 if (p->prefixlen == newp->prefixlen && 268 net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && 269 p->ifindex == newp->ifindex && 270 ipv6_addr_equal(&p->prefix, &newp->prefix)) { 271 if (!replace) { 272 ret = -EEXIST; 273 goto out; 274 } 275 hlist_replace_rcu(&p->list, &newp->list); 276 ip6addrlbl_put(p); 277 goto out; 278 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || 279 (p->prefixlen < newp->prefixlen)) { 280 hlist_add_before_rcu(&newp->list, &p->list); 281 goto out; 282 } 283 } 284 hlist_add_after_rcu(&p->list, &newp->list); 285 } 286 out: 287 if (!ret) 288 ip6addrlbl_table.seq++; 289 return ret; 290 } 291 292 /* add a label */ 293 static int ip6addrlbl_add(struct net *net, 294 const struct in6_addr *prefix, int prefixlen, 295 int ifindex, u32 label, int replace) 296 { 297 struct ip6addrlbl_entry *newp; 298 int ret = 0; 299 300 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", 301 __func__, prefix, prefixlen, ifindex, (unsigned int)label, 302 replace); 303 304 newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); 305 if (IS_ERR(newp)) 306 return PTR_ERR(newp); 307 spin_lock(&ip6addrlbl_table.lock); 308 ret = __ip6addrlbl_add(newp, replace); 309 spin_unlock(&ip6addrlbl_table.lock); 310 if (ret) 311 ip6addrlbl_free(newp); 312 return ret; 313 } 314 315 /* remove a label */ 316 static int __ip6addrlbl_del(struct net *net, 317 const struct in6_addr *prefix, int prefixlen, 318 int ifindex) 319 { 320 struct ip6addrlbl_entry *p = NULL; 321 struct hlist_node *n; 322 int ret = -ESRCH; 323 324 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 325 __func__, prefix, prefixlen, ifindex); 326 327 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 328 if (p->prefixlen == prefixlen && 329 net_eq(ip6addrlbl_net(p), net) && 330 p->ifindex == ifindex && 331 ipv6_addr_equal(&p->prefix, prefix)) { 332 hlist_del_rcu(&p->list); 333 ip6addrlbl_put(p); 334 ret = 0; 335 break; 336 } 337 } 338 return ret; 339 } 340 341 static int ip6addrlbl_del(struct net *net, 342 const struct in6_addr *prefix, int prefixlen, 343 int ifindex) 344 { 345 struct in6_addr prefix_buf; 346 int ret; 347 348 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 349 __func__, prefix, prefixlen, ifindex); 350 351 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); 352 spin_lock(&ip6addrlbl_table.lock); 353 ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); 354 spin_unlock(&ip6addrlbl_table.lock); 355 return ret; 356 } 357 358 /* add default label */ 359 static int __net_init ip6addrlbl_net_init(struct net *net) 360 { 361 int err = 0; 362 int i; 363 364 ADDRLABEL(KERN_DEBUG "%s\n", __func__); 365 366 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { 367 int ret = ip6addrlbl_add(net, 368 ip6addrlbl_init_table[i].prefix, 369 ip6addrlbl_init_table[i].prefixlen, 370 0, 371 ip6addrlbl_init_table[i].label, 0); 372 /* XXX: should we free all rules when we catch an error? */ 373 if (ret && (!err || err != -ENOMEM)) 374 err = ret; 375 } 376 return err; 377 } 378 379 static void __net_exit ip6addrlbl_net_exit(struct net *net) 380 { 381 struct ip6addrlbl_entry *p = NULL; 382 struct hlist_node *n; 383 384 /* Remove all labels belonging to the exiting net */ 385 spin_lock(&ip6addrlbl_table.lock); 386 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 387 if (net_eq(ip6addrlbl_net(p), net)) { 388 hlist_del_rcu(&p->list); 389 ip6addrlbl_put(p); 390 } 391 } 392 spin_unlock(&ip6addrlbl_table.lock); 393 } 394 395 static struct pernet_operations ipv6_addr_label_ops = { 396 .init = ip6addrlbl_net_init, 397 .exit = ip6addrlbl_net_exit, 398 }; 399 400 int __init ipv6_addr_label_init(void) 401 { 402 spin_lock_init(&ip6addrlbl_table.lock); 403 404 return register_pernet_subsys(&ipv6_addr_label_ops); 405 } 406 407 void ipv6_addr_label_cleanup(void) 408 { 409 unregister_pernet_subsys(&ipv6_addr_label_ops); 410 } 411 412 static const struct nla_policy ifal_policy[IFAL_MAX+1] = { 413 [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, 414 [IFAL_LABEL] = { .len = sizeof(u32), }, 415 }; 416 417 static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh) 418 { 419 struct net *net = sock_net(skb->sk); 420 struct ifaddrlblmsg *ifal; 421 struct nlattr *tb[IFAL_MAX+1]; 422 struct in6_addr *pfx; 423 u32 label; 424 int err = 0; 425 426 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); 427 if (err < 0) 428 return err; 429 430 ifal = nlmsg_data(nlh); 431 432 if (ifal->ifal_family != AF_INET6 || 433 ifal->ifal_prefixlen > 128) 434 return -EINVAL; 435 436 if (!tb[IFAL_ADDRESS]) 437 return -EINVAL; 438 pfx = nla_data(tb[IFAL_ADDRESS]); 439 440 if (!tb[IFAL_LABEL]) 441 return -EINVAL; 442 label = nla_get_u32(tb[IFAL_LABEL]); 443 if (label == IPV6_ADDR_LABEL_DEFAULT) 444 return -EINVAL; 445 446 switch(nlh->nlmsg_type) { 447 case RTM_NEWADDRLABEL: 448 if (ifal->ifal_index && 449 !__dev_get_by_index(net, ifal->ifal_index)) 450 return -EINVAL; 451 452 err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen, 453 ifal->ifal_index, label, 454 nlh->nlmsg_flags & NLM_F_REPLACE); 455 break; 456 case RTM_DELADDRLABEL: 457 err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen, 458 ifal->ifal_index); 459 break; 460 default: 461 err = -EOPNOTSUPP; 462 } 463 return err; 464 } 465 466 static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, 467 int prefixlen, int ifindex, u32 lseq) 468 { 469 struct ifaddrlblmsg *ifal = nlmsg_data(nlh); 470 ifal->ifal_family = AF_INET6; 471 ifal->ifal_prefixlen = prefixlen; 472 ifal->ifal_flags = 0; 473 ifal->ifal_index = ifindex; 474 ifal->ifal_seq = lseq; 475 }; 476 477 static int ip6addrlbl_fill(struct sk_buff *skb, 478 struct ip6addrlbl_entry *p, 479 u32 lseq, 480 u32 portid, u32 seq, int event, 481 unsigned int flags) 482 { 483 struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event, 484 sizeof(struct ifaddrlblmsg), flags); 485 if (!nlh) 486 return -EMSGSIZE; 487 488 ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq); 489 490 if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 || 491 nla_put_u32(skb, IFAL_LABEL, p->label) < 0) { 492 nlmsg_cancel(skb, nlh); 493 return -EMSGSIZE; 494 } 495 496 return nlmsg_end(skb, nlh); 497 } 498 499 static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) 500 { 501 struct net *net = sock_net(skb->sk); 502 struct ip6addrlbl_entry *p; 503 int idx = 0, s_idx = cb->args[0]; 504 int err; 505 506 rcu_read_lock(); 507 hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { 508 if (idx >= s_idx && 509 net_eq(ip6addrlbl_net(p), net)) { 510 if ((err = ip6addrlbl_fill(skb, p, 511 ip6addrlbl_table.seq, 512 NETLINK_CB(cb->skb).portid, 513 cb->nlh->nlmsg_seq, 514 RTM_NEWADDRLABEL, 515 NLM_F_MULTI)) <= 0) 516 break; 517 } 518 idx++; 519 } 520 rcu_read_unlock(); 521 cb->args[0] = idx; 522 return skb->len; 523 } 524 525 static inline int ip6addrlbl_msgsize(void) 526 { 527 return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) 528 + nla_total_size(16) /* IFAL_ADDRESS */ 529 + nla_total_size(4); /* IFAL_LABEL */ 530 } 531 532 static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh) 533 { 534 struct net *net = sock_net(in_skb->sk); 535 struct ifaddrlblmsg *ifal; 536 struct nlattr *tb[IFAL_MAX+1]; 537 struct in6_addr *addr; 538 u32 lseq; 539 int err = 0; 540 struct ip6addrlbl_entry *p; 541 struct sk_buff *skb; 542 543 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); 544 if (err < 0) 545 return err; 546 547 ifal = nlmsg_data(nlh); 548 549 if (ifal->ifal_family != AF_INET6 || 550 ifal->ifal_prefixlen != 128) 551 return -EINVAL; 552 553 if (ifal->ifal_index && 554 !__dev_get_by_index(net, ifal->ifal_index)) 555 return -EINVAL; 556 557 if (!tb[IFAL_ADDRESS]) 558 return -EINVAL; 559 addr = nla_data(tb[IFAL_ADDRESS]); 560 561 rcu_read_lock(); 562 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); 563 if (p && ip6addrlbl_hold(p)) 564 p = NULL; 565 lseq = ip6addrlbl_table.seq; 566 rcu_read_unlock(); 567 568 if (!p) { 569 err = -ESRCH; 570 goto out; 571 } 572 573 if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) { 574 ip6addrlbl_put(p); 575 return -ENOBUFS; 576 } 577 578 err = ip6addrlbl_fill(skb, p, lseq, 579 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 580 RTM_NEWADDRLABEL, 0); 581 582 ip6addrlbl_put(p); 583 584 if (err < 0) { 585 WARN_ON(err == -EMSGSIZE); 586 kfree_skb(skb); 587 goto out; 588 } 589 590 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 591 out: 592 return err; 593 } 594 595 void __init ipv6_addr_label_rtnl_register(void) 596 { 597 __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, 598 NULL, NULL); 599 __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, 600 NULL, NULL); 601 __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, 602 ip6addrlbl_dump, NULL); 603 } 604 605