1 /* 2 * IPv6 Address Label subsystem 3 * for the IPv6 "Default" Source Address Selection 4 * 5 * Copyright (C)2007 USAGI/WIDE Project 6 */ 7 /* 8 * Author: 9 * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/list.h> 14 #include <linux/rcupdate.h> 15 #include <linux/in6.h> 16 #include <linux/slab.h> 17 #include <net/addrconf.h> 18 #include <linux/if_addrlabel.h> 19 #include <linux/netlink.h> 20 #include <linux/rtnetlink.h> 21 #include <linux/refcount.h> 22 23 #if 0 24 #define ADDRLABEL(x...) printk(x) 25 #else 26 #define ADDRLABEL(x...) do { ; } while (0) 27 #endif 28 29 /* 30 * Policy Table 31 */ 32 struct ip6addrlbl_entry { 33 possible_net_t lbl_net; 34 struct in6_addr prefix; 35 int prefixlen; 36 int ifindex; 37 int addrtype; 38 u32 label; 39 struct hlist_node list; 40 refcount_t refcnt; 41 struct rcu_head rcu; 42 }; 43 44 static struct ip6addrlbl_table 45 { 46 struct hlist_head head; 47 spinlock_t lock; 48 u32 seq; 49 } ip6addrlbl_table; 50 51 static inline 52 struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) 53 { 54 return read_pnet(&lbl->lbl_net); 55 } 56 57 /* 58 * Default policy table (RFC6724 + extensions) 59 * 60 * prefix addr_type label 61 * ------------------------------------------------------------------------- 62 * ::1/128 LOOPBACK 0 63 * ::/0 N/A 1 64 * 2002::/16 N/A 2 65 * ::/96 COMPATv4 3 66 * ::ffff:0:0/96 V4MAPPED 4 67 * fc00::/7 N/A 5 ULA (RFC 4193) 68 * 2001::/32 N/A 6 Teredo (RFC 4380) 69 * 2001:10::/28 N/A 7 ORCHID (RFC 4843) 70 * fec0::/10 N/A 11 Site-local 71 * (deprecated by RFC3879) 72 * 3ffe::/16 N/A 12 6bone 73 * 74 * Note: 0xffffffff is used if we do not have any policies. 75 * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724. 76 */ 77 78 #define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL 79 80 static const __net_initconst struct ip6addrlbl_init_table 81 { 82 const struct in6_addr *prefix; 83 int prefixlen; 84 u32 label; 85 } ip6addrlbl_init_table[] = { 86 { /* ::/0 */ 87 .prefix = &in6addr_any, 88 .label = 1, 89 }, { /* fc00::/7 */ 90 .prefix = &(struct in6_addr){ { { 0xfc } } } , 91 .prefixlen = 7, 92 .label = 5, 93 }, { /* fec0::/10 */ 94 .prefix = &(struct in6_addr){ { { 0xfe, 0xc0 } } }, 95 .prefixlen = 10, 96 .label = 11, 97 }, { /* 2002::/16 */ 98 .prefix = &(struct in6_addr){ { { 0x20, 0x02 } } }, 99 .prefixlen = 16, 100 .label = 2, 101 }, { /* 3ffe::/16 */ 102 .prefix = &(struct in6_addr){ { { 0x3f, 0xfe } } }, 103 .prefixlen = 16, 104 .label = 12, 105 }, { /* 2001::/32 */ 106 .prefix = &(struct in6_addr){ { { 0x20, 0x01 } } }, 107 .prefixlen = 32, 108 .label = 6, 109 }, { /* 2001:10::/28 */ 110 .prefix = &(struct in6_addr){ { { 0x20, 0x01, 0x00, 0x10 } } }, 111 .prefixlen = 28, 112 .label = 7, 113 }, { /* ::ffff:0:0 */ 114 .prefix = &(struct in6_addr){ { { [10] = 0xff, [11] = 0xff } } }, 115 .prefixlen = 96, 116 .label = 4, 117 }, { /* ::/96 */ 118 .prefix = &in6addr_any, 119 .prefixlen = 96, 120 .label = 3, 121 }, { /* ::1/128 */ 122 .prefix = &in6addr_loopback, 123 .prefixlen = 128, 124 .label = 0, 125 } 126 }; 127 128 /* Object management */ 129 static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) 130 { 131 kfree(p); 132 } 133 134 static void ip6addrlbl_free_rcu(struct rcu_head *h) 135 { 136 ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); 137 } 138 139 static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p) 140 { 141 return refcount_inc_not_zero(&p->refcnt); 142 } 143 144 static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) 145 { 146 if (refcount_dec_and_test(&p->refcnt)) 147 call_rcu(&p->rcu, ip6addrlbl_free_rcu); 148 } 149 150 /* Find label */ 151 static bool __ip6addrlbl_match(struct net *net, 152 const struct ip6addrlbl_entry *p, 153 const struct in6_addr *addr, 154 int addrtype, int ifindex) 155 { 156 if (!net_eq(ip6addrlbl_net(p), net)) 157 return false; 158 if (p->ifindex && p->ifindex != ifindex) 159 return false; 160 if (p->addrtype && p->addrtype != addrtype) 161 return false; 162 if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen)) 163 return false; 164 return true; 165 } 166 167 static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, 168 const struct in6_addr *addr, 169 int type, int ifindex) 170 { 171 struct ip6addrlbl_entry *p; 172 hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { 173 if (__ip6addrlbl_match(net, p, addr, type, ifindex)) 174 return p; 175 } 176 return NULL; 177 } 178 179 u32 ipv6_addr_label(struct net *net, 180 const struct in6_addr *addr, int type, int ifindex) 181 { 182 u32 label; 183 struct ip6addrlbl_entry *p; 184 185 type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK; 186 187 rcu_read_lock(); 188 p = __ipv6_addr_label(net, addr, type, ifindex); 189 label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; 190 rcu_read_unlock(); 191 192 ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", 193 __func__, addr, type, ifindex, label); 194 195 return label; 196 } 197 198 /* allocate one entry */ 199 static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, 200 const struct in6_addr *prefix, 201 int prefixlen, int ifindex, 202 u32 label) 203 { 204 struct ip6addrlbl_entry *newp; 205 int addrtype; 206 207 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", 208 __func__, prefix, prefixlen, ifindex, (unsigned int)label); 209 210 addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); 211 212 switch (addrtype) { 213 case IPV6_ADDR_MAPPED: 214 if (prefixlen > 96) 215 return ERR_PTR(-EINVAL); 216 if (prefixlen < 96) 217 addrtype = 0; 218 break; 219 case IPV6_ADDR_COMPATv4: 220 if (prefixlen != 96) 221 addrtype = 0; 222 break; 223 case IPV6_ADDR_LOOPBACK: 224 if (prefixlen != 128) 225 addrtype = 0; 226 break; 227 } 228 229 newp = kmalloc(sizeof(*newp), GFP_KERNEL); 230 if (!newp) 231 return ERR_PTR(-ENOMEM); 232 233 ipv6_addr_prefix(&newp->prefix, prefix, prefixlen); 234 newp->prefixlen = prefixlen; 235 newp->ifindex = ifindex; 236 newp->addrtype = addrtype; 237 newp->label = label; 238 INIT_HLIST_NODE(&newp->list); 239 write_pnet(&newp->lbl_net, net); 240 refcount_set(&newp->refcnt, 1); 241 return newp; 242 } 243 244 /* add a label */ 245 static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) 246 { 247 struct hlist_node *n; 248 struct ip6addrlbl_entry *last = NULL, *p = NULL; 249 int ret = 0; 250 251 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp, 252 replace); 253 254 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 255 if (p->prefixlen == newp->prefixlen && 256 net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && 257 p->ifindex == newp->ifindex && 258 ipv6_addr_equal(&p->prefix, &newp->prefix)) { 259 if (!replace) { 260 ret = -EEXIST; 261 goto out; 262 } 263 hlist_replace_rcu(&p->list, &newp->list); 264 ip6addrlbl_put(p); 265 goto out; 266 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || 267 (p->prefixlen < newp->prefixlen)) { 268 hlist_add_before_rcu(&newp->list, &p->list); 269 goto out; 270 } 271 last = p; 272 } 273 if (last) 274 hlist_add_behind_rcu(&newp->list, &last->list); 275 else 276 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); 277 out: 278 if (!ret) 279 ip6addrlbl_table.seq++; 280 return ret; 281 } 282 283 /* add a label */ 284 static int ip6addrlbl_add(struct net *net, 285 const struct in6_addr *prefix, int prefixlen, 286 int ifindex, u32 label, int replace) 287 { 288 struct ip6addrlbl_entry *newp; 289 int ret = 0; 290 291 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", 292 __func__, prefix, prefixlen, ifindex, (unsigned int)label, 293 replace); 294 295 newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); 296 if (IS_ERR(newp)) 297 return PTR_ERR(newp); 298 spin_lock(&ip6addrlbl_table.lock); 299 ret = __ip6addrlbl_add(newp, replace); 300 spin_unlock(&ip6addrlbl_table.lock); 301 if (ret) 302 ip6addrlbl_free(newp); 303 return ret; 304 } 305 306 /* remove a label */ 307 static int __ip6addrlbl_del(struct net *net, 308 const struct in6_addr *prefix, int prefixlen, 309 int ifindex) 310 { 311 struct ip6addrlbl_entry *p = NULL; 312 struct hlist_node *n; 313 int ret = -ESRCH; 314 315 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 316 __func__, prefix, prefixlen, ifindex); 317 318 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 319 if (p->prefixlen == prefixlen && 320 net_eq(ip6addrlbl_net(p), net) && 321 p->ifindex == ifindex && 322 ipv6_addr_equal(&p->prefix, prefix)) { 323 hlist_del_rcu(&p->list); 324 ip6addrlbl_put(p); 325 ret = 0; 326 break; 327 } 328 } 329 return ret; 330 } 331 332 static int ip6addrlbl_del(struct net *net, 333 const struct in6_addr *prefix, int prefixlen, 334 int ifindex) 335 { 336 struct in6_addr prefix_buf; 337 int ret; 338 339 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 340 __func__, prefix, prefixlen, ifindex); 341 342 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); 343 spin_lock(&ip6addrlbl_table.lock); 344 ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); 345 spin_unlock(&ip6addrlbl_table.lock); 346 return ret; 347 } 348 349 /* add default label */ 350 static int __net_init ip6addrlbl_net_init(struct net *net) 351 { 352 int err = 0; 353 int i; 354 355 ADDRLABEL(KERN_DEBUG "%s\n", __func__); 356 357 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { 358 int ret = ip6addrlbl_add(net, 359 ip6addrlbl_init_table[i].prefix, 360 ip6addrlbl_init_table[i].prefixlen, 361 0, 362 ip6addrlbl_init_table[i].label, 0); 363 /* XXX: should we free all rules when we catch an error? */ 364 if (ret && (!err || err != -ENOMEM)) 365 err = ret; 366 } 367 return err; 368 } 369 370 static void __net_exit ip6addrlbl_net_exit(struct net *net) 371 { 372 struct ip6addrlbl_entry *p = NULL; 373 struct hlist_node *n; 374 375 /* Remove all labels belonging to the exiting net */ 376 spin_lock(&ip6addrlbl_table.lock); 377 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 378 if (net_eq(ip6addrlbl_net(p), net)) { 379 hlist_del_rcu(&p->list); 380 ip6addrlbl_put(p); 381 } 382 } 383 spin_unlock(&ip6addrlbl_table.lock); 384 } 385 386 static struct pernet_operations ipv6_addr_label_ops = { 387 .init = ip6addrlbl_net_init, 388 .exit = ip6addrlbl_net_exit, 389 }; 390 391 int __init ipv6_addr_label_init(void) 392 { 393 spin_lock_init(&ip6addrlbl_table.lock); 394 395 return register_pernet_subsys(&ipv6_addr_label_ops); 396 } 397 398 void ipv6_addr_label_cleanup(void) 399 { 400 unregister_pernet_subsys(&ipv6_addr_label_ops); 401 } 402 403 static const struct nla_policy ifal_policy[IFAL_MAX+1] = { 404 [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, 405 [IFAL_LABEL] = { .len = sizeof(u32), }, 406 }; 407 408 static bool addrlbl_ifindex_exists(struct net *net, int ifindex) 409 { 410 411 struct net_device *dev; 412 413 rcu_read_lock(); 414 dev = dev_get_by_index_rcu(net, ifindex); 415 rcu_read_unlock(); 416 417 return dev != NULL; 418 } 419 420 static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, 421 struct netlink_ext_ack *extack) 422 { 423 struct net *net = sock_net(skb->sk); 424 struct ifaddrlblmsg *ifal; 425 struct nlattr *tb[IFAL_MAX+1]; 426 struct in6_addr *pfx; 427 u32 label; 428 int err = 0; 429 430 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, 431 extack); 432 if (err < 0) 433 return err; 434 435 ifal = nlmsg_data(nlh); 436 437 if (ifal->ifal_family != AF_INET6 || 438 ifal->ifal_prefixlen > 128) 439 return -EINVAL; 440 441 if (!tb[IFAL_ADDRESS]) 442 return -EINVAL; 443 pfx = nla_data(tb[IFAL_ADDRESS]); 444 445 if (!tb[IFAL_LABEL]) 446 return -EINVAL; 447 label = nla_get_u32(tb[IFAL_LABEL]); 448 if (label == IPV6_ADDR_LABEL_DEFAULT) 449 return -EINVAL; 450 451 switch (nlh->nlmsg_type) { 452 case RTM_NEWADDRLABEL: 453 if (ifal->ifal_index && 454 !addrlbl_ifindex_exists(net, ifal->ifal_index)) 455 return -EINVAL; 456 457 err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen, 458 ifal->ifal_index, label, 459 nlh->nlmsg_flags & NLM_F_REPLACE); 460 break; 461 case RTM_DELADDRLABEL: 462 err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen, 463 ifal->ifal_index); 464 break; 465 default: 466 err = -EOPNOTSUPP; 467 } 468 return err; 469 } 470 471 static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, 472 int prefixlen, int ifindex, u32 lseq) 473 { 474 struct ifaddrlblmsg *ifal = nlmsg_data(nlh); 475 ifal->ifal_family = AF_INET6; 476 ifal->ifal_prefixlen = prefixlen; 477 ifal->ifal_flags = 0; 478 ifal->ifal_index = ifindex; 479 ifal->ifal_seq = lseq; 480 }; 481 482 static int ip6addrlbl_fill(struct sk_buff *skb, 483 struct ip6addrlbl_entry *p, 484 u32 lseq, 485 u32 portid, u32 seq, int event, 486 unsigned int flags) 487 { 488 struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event, 489 sizeof(struct ifaddrlblmsg), flags); 490 if (!nlh) 491 return -EMSGSIZE; 492 493 ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq); 494 495 if (nla_put_in6_addr(skb, IFAL_ADDRESS, &p->prefix) < 0 || 496 nla_put_u32(skb, IFAL_LABEL, p->label) < 0) { 497 nlmsg_cancel(skb, nlh); 498 return -EMSGSIZE; 499 } 500 501 nlmsg_end(skb, nlh); 502 return 0; 503 } 504 505 static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) 506 { 507 struct net *net = sock_net(skb->sk); 508 struct ip6addrlbl_entry *p; 509 int idx = 0, s_idx = cb->args[0]; 510 int err; 511 512 rcu_read_lock(); 513 hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { 514 if (idx >= s_idx && 515 net_eq(ip6addrlbl_net(p), net)) { 516 err = ip6addrlbl_fill(skb, p, 517 ip6addrlbl_table.seq, 518 NETLINK_CB(cb->skb).portid, 519 cb->nlh->nlmsg_seq, 520 RTM_NEWADDRLABEL, 521 NLM_F_MULTI); 522 if (err < 0) 523 break; 524 } 525 idx++; 526 } 527 rcu_read_unlock(); 528 cb->args[0] = idx; 529 return skb->len; 530 } 531 532 static inline int ip6addrlbl_msgsize(void) 533 { 534 return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) 535 + nla_total_size(16) /* IFAL_ADDRESS */ 536 + nla_total_size(4); /* IFAL_LABEL */ 537 } 538 539 static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, 540 struct netlink_ext_ack *extack) 541 { 542 struct net *net = sock_net(in_skb->sk); 543 struct ifaddrlblmsg *ifal; 544 struct nlattr *tb[IFAL_MAX+1]; 545 struct in6_addr *addr; 546 u32 lseq; 547 int err = 0; 548 struct ip6addrlbl_entry *p; 549 struct sk_buff *skb; 550 551 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, 552 extack); 553 if (err < 0) 554 return err; 555 556 ifal = nlmsg_data(nlh); 557 558 if (ifal->ifal_family != AF_INET6 || 559 ifal->ifal_prefixlen != 128) 560 return -EINVAL; 561 562 if (ifal->ifal_index && 563 !addrlbl_ifindex_exists(net, ifal->ifal_index)) 564 return -EINVAL; 565 566 if (!tb[IFAL_ADDRESS]) 567 return -EINVAL; 568 addr = nla_data(tb[IFAL_ADDRESS]); 569 570 rcu_read_lock(); 571 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); 572 if (p && !ip6addrlbl_hold(p)) 573 p = NULL; 574 lseq = ip6addrlbl_table.seq; 575 rcu_read_unlock(); 576 577 if (!p) { 578 err = -ESRCH; 579 goto out; 580 } 581 582 skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL); 583 if (!skb) { 584 ip6addrlbl_put(p); 585 return -ENOBUFS; 586 } 587 588 err = ip6addrlbl_fill(skb, p, lseq, 589 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 590 RTM_NEWADDRLABEL, 0); 591 592 ip6addrlbl_put(p); 593 594 if (err < 0) { 595 WARN_ON(err == -EMSGSIZE); 596 kfree_skb(skb); 597 goto out; 598 } 599 600 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 601 out: 602 return err; 603 } 604 605 void __init ipv6_addr_label_rtnl_register(void) 606 { 607 __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, 608 NULL, RTNL_FLAG_DOIT_UNLOCKED); 609 __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, 610 NULL, RTNL_FLAG_DOIT_UNLOCKED); 611 __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, 612 ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED); 613 } 614 615