1 /* 2 * IPv6 Address Label subsystem 3 * for the IPv6 "Default" Source Address Selection 4 * 5 * Copyright (C)2007 USAGI/WIDE Project 6 */ 7 /* 8 * Author: 9 * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/list.h> 14 #include <linux/rcupdate.h> 15 #include <linux/in6.h> 16 #include <linux/slab.h> 17 #include <net/addrconf.h> 18 #include <linux/if_addrlabel.h> 19 #include <linux/netlink.h> 20 #include <linux/rtnetlink.h> 21 #include <linux/refcount.h> 22 23 #if 0 24 #define ADDRLABEL(x...) printk(x) 25 #else 26 #define ADDRLABEL(x...) do { ; } while (0) 27 #endif 28 29 /* 30 * Policy Table 31 */ 32 struct ip6addrlbl_entry { 33 struct in6_addr prefix; 34 int prefixlen; 35 int ifindex; 36 int addrtype; 37 u32 label; 38 struct hlist_node list; 39 refcount_t refcnt; 40 struct rcu_head rcu; 41 }; 42 43 /* 44 * Default policy table (RFC6724 + extensions) 45 * 46 * prefix addr_type label 47 * ------------------------------------------------------------------------- 48 * ::1/128 LOOPBACK 0 49 * ::/0 N/A 1 50 * 2002::/16 N/A 2 51 * ::/96 COMPATv4 3 52 * ::ffff:0:0/96 V4MAPPED 4 53 * fc00::/7 N/A 5 ULA (RFC 4193) 54 * 2001::/32 N/A 6 Teredo (RFC 4380) 55 * 2001:10::/28 N/A 7 ORCHID (RFC 4843) 56 * fec0::/10 N/A 11 Site-local 57 * (deprecated by RFC3879) 58 * 3ffe::/16 N/A 12 6bone 59 * 60 * Note: 0xffffffff is used if we do not have any policies. 61 * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724. 62 */ 63 64 #define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL 65 66 static const __net_initconst struct ip6addrlbl_init_table 67 { 68 const struct in6_addr *prefix; 69 int prefixlen; 70 u32 label; 71 } ip6addrlbl_init_table[] = { 72 { /* ::/0 */ 73 .prefix = &in6addr_any, 74 .label = 1, 75 }, { /* fc00::/7 */ 76 .prefix = &(struct in6_addr){ { { 0xfc } } } , 77 .prefixlen = 7, 78 .label = 5, 79 }, { /* fec0::/10 */ 80 .prefix = &(struct in6_addr){ { { 0xfe, 0xc0 } } }, 81 .prefixlen = 10, 82 .label = 11, 83 }, { /* 2002::/16 */ 84 .prefix = &(struct in6_addr){ { { 0x20, 0x02 } } }, 85 .prefixlen = 16, 86 .label = 2, 87 }, { /* 3ffe::/16 */ 88 .prefix = &(struct in6_addr){ { { 0x3f, 0xfe } } }, 89 .prefixlen = 16, 90 .label = 12, 91 }, { /* 2001::/32 */ 92 .prefix = &(struct in6_addr){ { { 0x20, 0x01 } } }, 93 .prefixlen = 32, 94 .label = 6, 95 }, { /* 2001:10::/28 */ 96 .prefix = &(struct in6_addr){ { { 0x20, 0x01, 0x00, 0x10 } } }, 97 .prefixlen = 28, 98 .label = 7, 99 }, { /* ::ffff:0:0 */ 100 .prefix = &(struct in6_addr){ { { [10] = 0xff, [11] = 0xff } } }, 101 .prefixlen = 96, 102 .label = 4, 103 }, { /* ::/96 */ 104 .prefix = &in6addr_any, 105 .prefixlen = 96, 106 .label = 3, 107 }, { /* ::1/128 */ 108 .prefix = &in6addr_loopback, 109 .prefixlen = 128, 110 .label = 0, 111 } 112 }; 113 114 /* Object management */ 115 static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) 116 { 117 kfree(p); 118 } 119 120 static void ip6addrlbl_free_rcu(struct rcu_head *h) 121 { 122 ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); 123 } 124 125 static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p) 126 { 127 return refcount_inc_not_zero(&p->refcnt); 128 } 129 130 static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) 131 { 132 if (refcount_dec_and_test(&p->refcnt)) 133 call_rcu(&p->rcu, ip6addrlbl_free_rcu); 134 } 135 136 /* Find label */ 137 static bool __ip6addrlbl_match(const struct ip6addrlbl_entry *p, 138 const struct in6_addr *addr, 139 int addrtype, int ifindex) 140 { 141 if (p->ifindex && p->ifindex != ifindex) 142 return false; 143 if (p->addrtype && p->addrtype != addrtype) 144 return false; 145 if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen)) 146 return false; 147 return true; 148 } 149 150 static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, 151 const struct in6_addr *addr, 152 int type, int ifindex) 153 { 154 struct ip6addrlbl_entry *p; 155 156 hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) { 157 if (__ip6addrlbl_match(p, addr, type, ifindex)) 158 return p; 159 } 160 return NULL; 161 } 162 163 u32 ipv6_addr_label(struct net *net, 164 const struct in6_addr *addr, int type, int ifindex) 165 { 166 u32 label; 167 struct ip6addrlbl_entry *p; 168 169 type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK; 170 171 rcu_read_lock(); 172 p = __ipv6_addr_label(net, addr, type, ifindex); 173 label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; 174 rcu_read_unlock(); 175 176 ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", 177 __func__, addr, type, ifindex, label); 178 179 return label; 180 } 181 182 /* allocate one entry */ 183 static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, 184 int prefixlen, int ifindex, 185 u32 label) 186 { 187 struct ip6addrlbl_entry *newp; 188 int addrtype; 189 190 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", 191 __func__, prefix, prefixlen, ifindex, (unsigned int)label); 192 193 addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); 194 195 switch (addrtype) { 196 case IPV6_ADDR_MAPPED: 197 if (prefixlen > 96) 198 return ERR_PTR(-EINVAL); 199 if (prefixlen < 96) 200 addrtype = 0; 201 break; 202 case IPV6_ADDR_COMPATv4: 203 if (prefixlen != 96) 204 addrtype = 0; 205 break; 206 case IPV6_ADDR_LOOPBACK: 207 if (prefixlen != 128) 208 addrtype = 0; 209 break; 210 } 211 212 newp = kmalloc(sizeof(*newp), GFP_KERNEL); 213 if (!newp) 214 return ERR_PTR(-ENOMEM); 215 216 ipv6_addr_prefix(&newp->prefix, prefix, prefixlen); 217 newp->prefixlen = prefixlen; 218 newp->ifindex = ifindex; 219 newp->addrtype = addrtype; 220 newp->label = label; 221 INIT_HLIST_NODE(&newp->list); 222 refcount_set(&newp->refcnt, 1); 223 return newp; 224 } 225 226 /* add a label */ 227 static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp, 228 int replace) 229 { 230 struct ip6addrlbl_entry *last = NULL, *p = NULL; 231 struct hlist_node *n; 232 int ret = 0; 233 234 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp, 235 replace); 236 237 hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { 238 if (p->prefixlen == newp->prefixlen && 239 p->ifindex == newp->ifindex && 240 ipv6_addr_equal(&p->prefix, &newp->prefix)) { 241 if (!replace) { 242 ret = -EEXIST; 243 goto out; 244 } 245 hlist_replace_rcu(&p->list, &newp->list); 246 ip6addrlbl_put(p); 247 goto out; 248 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || 249 (p->prefixlen < newp->prefixlen)) { 250 hlist_add_before_rcu(&newp->list, &p->list); 251 goto out; 252 } 253 last = p; 254 } 255 if (last) 256 hlist_add_behind_rcu(&newp->list, &last->list); 257 else 258 hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head); 259 out: 260 if (!ret) 261 net->ipv6.ip6addrlbl_table.seq++; 262 return ret; 263 } 264 265 /* add a label */ 266 static int ip6addrlbl_add(struct net *net, 267 const struct in6_addr *prefix, int prefixlen, 268 int ifindex, u32 label, int replace) 269 { 270 struct ip6addrlbl_entry *newp; 271 int ret = 0; 272 273 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", 274 __func__, prefix, prefixlen, ifindex, (unsigned int)label, 275 replace); 276 277 newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label); 278 if (IS_ERR(newp)) 279 return PTR_ERR(newp); 280 spin_lock(&net->ipv6.ip6addrlbl_table.lock); 281 ret = __ip6addrlbl_add(net, newp, replace); 282 spin_unlock(&net->ipv6.ip6addrlbl_table.lock); 283 if (ret) 284 ip6addrlbl_free(newp); 285 return ret; 286 } 287 288 /* remove a label */ 289 static int __ip6addrlbl_del(struct net *net, 290 const struct in6_addr *prefix, int prefixlen, 291 int ifindex) 292 { 293 struct ip6addrlbl_entry *p = NULL; 294 struct hlist_node *n; 295 int ret = -ESRCH; 296 297 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 298 __func__, prefix, prefixlen, ifindex); 299 300 hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { 301 if (p->prefixlen == prefixlen && 302 p->ifindex == ifindex && 303 ipv6_addr_equal(&p->prefix, prefix)) { 304 hlist_del_rcu(&p->list); 305 ip6addrlbl_put(p); 306 ret = 0; 307 break; 308 } 309 } 310 return ret; 311 } 312 313 static int ip6addrlbl_del(struct net *net, 314 const struct in6_addr *prefix, int prefixlen, 315 int ifindex) 316 { 317 struct in6_addr prefix_buf; 318 int ret; 319 320 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 321 __func__, prefix, prefixlen, ifindex); 322 323 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); 324 spin_lock(&net->ipv6.ip6addrlbl_table.lock); 325 ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); 326 spin_unlock(&net->ipv6.ip6addrlbl_table.lock); 327 return ret; 328 } 329 330 /* add default label */ 331 static int __net_init ip6addrlbl_net_init(struct net *net) 332 { 333 int err = 0; 334 int i; 335 336 ADDRLABEL(KERN_DEBUG "%s\n", __func__); 337 338 spin_lock_init(&net->ipv6.ip6addrlbl_table.lock); 339 INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head); 340 341 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { 342 int ret = ip6addrlbl_add(net, 343 ip6addrlbl_init_table[i].prefix, 344 ip6addrlbl_init_table[i].prefixlen, 345 0, 346 ip6addrlbl_init_table[i].label, 0); 347 /* XXX: should we free all rules when we catch an error? */ 348 if (ret && (!err || err != -ENOMEM)) 349 err = ret; 350 } 351 return err; 352 } 353 354 static void __net_exit ip6addrlbl_net_exit(struct net *net) 355 { 356 struct ip6addrlbl_entry *p = NULL; 357 struct hlist_node *n; 358 359 /* Remove all labels belonging to the exiting net */ 360 spin_lock(&net->ipv6.ip6addrlbl_table.lock); 361 hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { 362 hlist_del_rcu(&p->list); 363 ip6addrlbl_put(p); 364 } 365 spin_unlock(&net->ipv6.ip6addrlbl_table.lock); 366 } 367 368 static struct pernet_operations ipv6_addr_label_ops = { 369 .init = ip6addrlbl_net_init, 370 .exit = ip6addrlbl_net_exit, 371 }; 372 373 int __init ipv6_addr_label_init(void) 374 { 375 return register_pernet_subsys(&ipv6_addr_label_ops); 376 } 377 378 void ipv6_addr_label_cleanup(void) 379 { 380 unregister_pernet_subsys(&ipv6_addr_label_ops); 381 } 382 383 static const struct nla_policy ifal_policy[IFAL_MAX+1] = { 384 [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, 385 [IFAL_LABEL] = { .len = sizeof(u32), }, 386 }; 387 388 static bool addrlbl_ifindex_exists(struct net *net, int ifindex) 389 { 390 391 struct net_device *dev; 392 393 rcu_read_lock(); 394 dev = dev_get_by_index_rcu(net, ifindex); 395 rcu_read_unlock(); 396 397 return dev != NULL; 398 } 399 400 static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, 401 struct netlink_ext_ack *extack) 402 { 403 struct net *net = sock_net(skb->sk); 404 struct ifaddrlblmsg *ifal; 405 struct nlattr *tb[IFAL_MAX+1]; 406 struct in6_addr *pfx; 407 u32 label; 408 int err = 0; 409 410 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, 411 extack); 412 if (err < 0) 413 return err; 414 415 ifal = nlmsg_data(nlh); 416 417 if (ifal->ifal_family != AF_INET6 || 418 ifal->ifal_prefixlen > 128) 419 return -EINVAL; 420 421 if (!tb[IFAL_ADDRESS]) 422 return -EINVAL; 423 pfx = nla_data(tb[IFAL_ADDRESS]); 424 425 if (!tb[IFAL_LABEL]) 426 return -EINVAL; 427 label = nla_get_u32(tb[IFAL_LABEL]); 428 if (label == IPV6_ADDR_LABEL_DEFAULT) 429 return -EINVAL; 430 431 switch (nlh->nlmsg_type) { 432 case RTM_NEWADDRLABEL: 433 if (ifal->ifal_index && 434 !addrlbl_ifindex_exists(net, ifal->ifal_index)) 435 return -EINVAL; 436 437 err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen, 438 ifal->ifal_index, label, 439 nlh->nlmsg_flags & NLM_F_REPLACE); 440 break; 441 case RTM_DELADDRLABEL: 442 err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen, 443 ifal->ifal_index); 444 break; 445 default: 446 err = -EOPNOTSUPP; 447 } 448 return err; 449 } 450 451 static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, 452 int prefixlen, int ifindex, u32 lseq) 453 { 454 struct ifaddrlblmsg *ifal = nlmsg_data(nlh); 455 ifal->ifal_family = AF_INET6; 456 ifal->ifal_prefixlen = prefixlen; 457 ifal->ifal_flags = 0; 458 ifal->ifal_index = ifindex; 459 ifal->ifal_seq = lseq; 460 }; 461 462 static int ip6addrlbl_fill(struct sk_buff *skb, 463 struct ip6addrlbl_entry *p, 464 u32 lseq, 465 u32 portid, u32 seq, int event, 466 unsigned int flags) 467 { 468 struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event, 469 sizeof(struct ifaddrlblmsg), flags); 470 if (!nlh) 471 return -EMSGSIZE; 472 473 ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq); 474 475 if (nla_put_in6_addr(skb, IFAL_ADDRESS, &p->prefix) < 0 || 476 nla_put_u32(skb, IFAL_LABEL, p->label) < 0) { 477 nlmsg_cancel(skb, nlh); 478 return -EMSGSIZE; 479 } 480 481 nlmsg_end(skb, nlh); 482 return 0; 483 } 484 485 static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) 486 { 487 struct net *net = sock_net(skb->sk); 488 struct ip6addrlbl_entry *p; 489 int idx = 0, s_idx = cb->args[0]; 490 int err; 491 492 rcu_read_lock(); 493 hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) { 494 if (idx >= s_idx) { 495 err = ip6addrlbl_fill(skb, p, 496 net->ipv6.ip6addrlbl_table.seq, 497 NETLINK_CB(cb->skb).portid, 498 cb->nlh->nlmsg_seq, 499 RTM_NEWADDRLABEL, 500 NLM_F_MULTI); 501 if (err < 0) 502 break; 503 } 504 idx++; 505 } 506 rcu_read_unlock(); 507 cb->args[0] = idx; 508 return skb->len; 509 } 510 511 static inline int ip6addrlbl_msgsize(void) 512 { 513 return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) 514 + nla_total_size(16) /* IFAL_ADDRESS */ 515 + nla_total_size(4); /* IFAL_LABEL */ 516 } 517 518 static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, 519 struct netlink_ext_ack *extack) 520 { 521 struct net *net = sock_net(in_skb->sk); 522 struct ifaddrlblmsg *ifal; 523 struct nlattr *tb[IFAL_MAX+1]; 524 struct in6_addr *addr; 525 u32 lseq; 526 int err = 0; 527 struct ip6addrlbl_entry *p; 528 struct sk_buff *skb; 529 530 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, 531 extack); 532 if (err < 0) 533 return err; 534 535 ifal = nlmsg_data(nlh); 536 537 if (ifal->ifal_family != AF_INET6 || 538 ifal->ifal_prefixlen != 128) 539 return -EINVAL; 540 541 if (ifal->ifal_index && 542 !addrlbl_ifindex_exists(net, ifal->ifal_index)) 543 return -EINVAL; 544 545 if (!tb[IFAL_ADDRESS]) 546 return -EINVAL; 547 addr = nla_data(tb[IFAL_ADDRESS]); 548 549 rcu_read_lock(); 550 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); 551 if (p && !ip6addrlbl_hold(p)) 552 p = NULL; 553 lseq = net->ipv6.ip6addrlbl_table.seq; 554 rcu_read_unlock(); 555 556 if (!p) { 557 err = -ESRCH; 558 goto out; 559 } 560 561 skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL); 562 if (!skb) { 563 ip6addrlbl_put(p); 564 return -ENOBUFS; 565 } 566 567 err = ip6addrlbl_fill(skb, p, lseq, 568 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 569 RTM_NEWADDRLABEL, 0); 570 571 ip6addrlbl_put(p); 572 573 if (err < 0) { 574 WARN_ON(err == -EMSGSIZE); 575 kfree_skb(skb); 576 goto out; 577 } 578 579 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 580 out: 581 return err; 582 } 583 584 void __init ipv6_addr_label_rtnl_register(void) 585 { 586 __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, 587 NULL, RTNL_FLAG_DOIT_UNLOCKED); 588 __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, 589 NULL, RTNL_FLAG_DOIT_UNLOCKED); 590 __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, 591 ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED); 592 } 593 594