1 #include <linux/types.h> 2 #include <linux/skbuff.h> 3 #include <linux/socket.h> 4 #include <linux/sysctl.h> 5 #include <linux/net.h> 6 #include <linux/module.h> 7 #include <linux/if_arp.h> 8 #include <linux/ipv6.h> 9 #include <linux/mpls.h> 10 #include <linux/vmalloc.h> 11 #include <net/ip.h> 12 #include <net/dst.h> 13 #include <net/sock.h> 14 #include <net/arp.h> 15 #include <net/ip_fib.h> 16 #include <net/netevent.h> 17 #include <net/netns/generic.h> 18 #include "internal.h" 19 20 #define LABEL_NOT_SPECIFIED (1<<20) 21 #define MAX_NEW_LABELS 2 22 23 /* This maximum ha length copied from the definition of struct neighbour */ 24 #define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))) 25 26 struct mpls_route { /* next hop label forwarding entry */ 27 struct net_device __rcu *rt_dev; 28 struct rcu_head rt_rcu; 29 u32 rt_label[MAX_NEW_LABELS]; 30 u8 rt_protocol; /* routing protocol that set this entry */ 31 u8 rt_labels; 32 u8 rt_via_alen; 33 u8 rt_via_table; 34 u8 rt_via[0]; 35 }; 36 37 static int zero = 0; 38 static int label_limit = (1 << 20) - 1; 39 40 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, 41 struct nlmsghdr *nlh, struct net *net, u32 portid, 42 unsigned int nlm_flags); 43 44 static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) 45 { 46 struct mpls_route *rt = NULL; 47 48 if (index < net->mpls.platform_labels) { 49 struct mpls_route __rcu **platform_label = 50 rcu_dereference(net->mpls.platform_label); 51 rt = rcu_dereference(platform_label[index]); 52 } 53 return rt; 54 } 55 56 static bool mpls_output_possible(const struct net_device *dev) 57 { 58 return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); 59 } 60 61 static unsigned int mpls_rt_header_size(const struct mpls_route *rt) 62 { 63 /* The size of the layer 2.5 labels to be added for this route */ 64 return rt->rt_labels * sizeof(struct mpls_shim_hdr); 65 } 66 67 static unsigned int mpls_dev_mtu(const struct net_device *dev) 68 { 69 /* The amount of data the layer 2 frame can hold */ 70 return dev->mtu; 71 } 72 73 static bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 74 { 75 if (skb->len <= mtu) 76 return false; 77 78 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) 79 return false; 80 81 return true; 82 } 83 84 static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, 85 struct mpls_entry_decoded dec) 86 { 87 /* RFC4385 and RFC5586 encode other packets in mpls such that 88 * they don't conflict with the ip version number, making 89 * decoding by examining the ip version correct in everything 90 * except for the strangest cases. 91 * 92 * The strange cases if we choose to support them will require 93 * manual configuration. 94 */ 95 struct iphdr *hdr4; 96 bool success = true; 97 98 /* The IPv4 code below accesses through the IPv4 header 99 * checksum, which is 12 bytes into the packet. 100 * The IPv6 code below accesses through the IPv6 hop limit 101 * which is 8 bytes into the packet. 102 * 103 * For all supported cases there should always be at least 12 104 * bytes of packet data present. The IPv4 header is 20 bytes 105 * without options and the IPv6 header is always 40 bytes 106 * long. 107 */ 108 if (!pskb_may_pull(skb, 12)) 109 return false; 110 111 /* Use ip_hdr to find the ip protocol version */ 112 hdr4 = ip_hdr(skb); 113 if (hdr4->version == 4) { 114 skb->protocol = htons(ETH_P_IP); 115 csum_replace2(&hdr4->check, 116 htons(hdr4->ttl << 8), 117 htons(dec.ttl << 8)); 118 hdr4->ttl = dec.ttl; 119 } 120 else if (hdr4->version == 6) { 121 struct ipv6hdr *hdr6 = ipv6_hdr(skb); 122 skb->protocol = htons(ETH_P_IPV6); 123 hdr6->hop_limit = dec.ttl; 124 } 125 else 126 /* version 0 and version 1 are used by pseudo wires */ 127 success = false; 128 return success; 129 } 130 131 static int mpls_forward(struct sk_buff *skb, struct net_device *dev, 132 struct packet_type *pt, struct net_device *orig_dev) 133 { 134 struct net *net = dev_net(dev); 135 struct mpls_shim_hdr *hdr; 136 struct mpls_route *rt; 137 struct mpls_entry_decoded dec; 138 struct net_device *out_dev; 139 unsigned int hh_len; 140 unsigned int new_header_size; 141 unsigned int mtu; 142 int err; 143 144 /* Careful this entire function runs inside of an rcu critical section */ 145 146 if (skb->pkt_type != PACKET_HOST) 147 goto drop; 148 149 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 150 goto drop; 151 152 if (!pskb_may_pull(skb, sizeof(*hdr))) 153 goto drop; 154 155 /* Read and decode the label */ 156 hdr = mpls_hdr(skb); 157 dec = mpls_entry_decode(hdr); 158 159 /* Pop the label */ 160 skb_pull(skb, sizeof(*hdr)); 161 skb_reset_network_header(skb); 162 163 skb_orphan(skb); 164 165 rt = mpls_route_input_rcu(net, dec.label); 166 if (!rt) 167 goto drop; 168 169 /* Find the output device */ 170 out_dev = rcu_dereference(rt->rt_dev); 171 if (!mpls_output_possible(out_dev)) 172 goto drop; 173 174 if (skb_warn_if_lro(skb)) 175 goto drop; 176 177 skb_forward_csum(skb); 178 179 /* Verify ttl is valid */ 180 if (dec.ttl <= 1) 181 goto drop; 182 dec.ttl -= 1; 183 184 /* Verify the destination can hold the packet */ 185 new_header_size = mpls_rt_header_size(rt); 186 mtu = mpls_dev_mtu(out_dev); 187 if (mpls_pkt_too_big(skb, mtu - new_header_size)) 188 goto drop; 189 190 hh_len = LL_RESERVED_SPACE(out_dev); 191 if (!out_dev->header_ops) 192 hh_len = 0; 193 194 /* Ensure there is enough space for the headers in the skb */ 195 if (skb_cow(skb, hh_len + new_header_size)) 196 goto drop; 197 198 skb->dev = out_dev; 199 skb->protocol = htons(ETH_P_MPLS_UC); 200 201 if (unlikely(!new_header_size && dec.bos)) { 202 /* Penultimate hop popping */ 203 if (!mpls_egress(rt, skb, dec)) 204 goto drop; 205 } else { 206 bool bos; 207 int i; 208 skb_push(skb, new_header_size); 209 skb_reset_network_header(skb); 210 /* Push the new labels */ 211 hdr = mpls_hdr(skb); 212 bos = dec.bos; 213 for (i = rt->rt_labels - 1; i >= 0; i--) { 214 hdr[i] = mpls_entry_encode(rt->rt_label[i], dec.ttl, 0, bos); 215 bos = false; 216 } 217 } 218 219 err = neigh_xmit(rt->rt_via_table, out_dev, rt->rt_via, skb); 220 if (err) 221 net_dbg_ratelimited("%s: packet transmission failed: %d\n", 222 __func__, err); 223 return 0; 224 225 drop: 226 kfree_skb(skb); 227 return NET_RX_DROP; 228 } 229 230 static struct packet_type mpls_packet_type __read_mostly = { 231 .type = cpu_to_be16(ETH_P_MPLS_UC), 232 .func = mpls_forward, 233 }; 234 235 static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = { 236 [RTA_DST] = { .type = NLA_U32 }, 237 [RTA_OIF] = { .type = NLA_U32 }, 238 }; 239 240 struct mpls_route_config { 241 u32 rc_protocol; 242 u32 rc_ifindex; 243 u16 rc_via_table; 244 u16 rc_via_alen; 245 u8 rc_via[MAX_VIA_ALEN]; 246 u32 rc_label; 247 u32 rc_output_labels; 248 u32 rc_output_label[MAX_NEW_LABELS]; 249 u32 rc_nlflags; 250 struct nl_info rc_nlinfo; 251 }; 252 253 static struct mpls_route *mpls_rt_alloc(size_t alen) 254 { 255 struct mpls_route *rt; 256 257 rt = kzalloc(sizeof(*rt) + alen, GFP_KERNEL); 258 if (rt) 259 rt->rt_via_alen = alen; 260 return rt; 261 } 262 263 static void mpls_rt_free(struct mpls_route *rt) 264 { 265 if (rt) 266 kfree_rcu(rt, rt_rcu); 267 } 268 269 static void mpls_notify_route(struct net *net, unsigned index, 270 struct mpls_route *old, struct mpls_route *new, 271 const struct nl_info *info) 272 { 273 struct nlmsghdr *nlh = info ? info->nlh : NULL; 274 unsigned portid = info ? info->portid : 0; 275 int event = new ? RTM_NEWROUTE : RTM_DELROUTE; 276 struct mpls_route *rt = new ? new : old; 277 unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0; 278 /* Ignore reserved labels for now */ 279 if (rt && (index >= 16)) 280 rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags); 281 } 282 283 static void mpls_route_update(struct net *net, unsigned index, 284 struct net_device *dev, struct mpls_route *new, 285 const struct nl_info *info) 286 { 287 struct mpls_route __rcu **platform_label; 288 struct mpls_route *rt, *old = NULL; 289 290 ASSERT_RTNL(); 291 292 platform_label = rtnl_dereference(net->mpls.platform_label); 293 rt = rtnl_dereference(platform_label[index]); 294 if (!dev || (rt && (rtnl_dereference(rt->rt_dev) == dev))) { 295 rcu_assign_pointer(platform_label[index], new); 296 old = rt; 297 } 298 299 mpls_notify_route(net, index, old, new, info); 300 301 /* If we removed a route free it now */ 302 mpls_rt_free(old); 303 } 304 305 static unsigned find_free_label(struct net *net) 306 { 307 struct mpls_route __rcu **platform_label; 308 size_t platform_labels; 309 unsigned index; 310 311 platform_label = rtnl_dereference(net->mpls.platform_label); 312 platform_labels = net->mpls.platform_labels; 313 for (index = 16; index < platform_labels; index++) { 314 if (!rtnl_dereference(platform_label[index])) 315 return index; 316 } 317 return LABEL_NOT_SPECIFIED; 318 } 319 320 static int mpls_route_add(struct mpls_route_config *cfg) 321 { 322 struct mpls_route __rcu **platform_label; 323 struct net *net = cfg->rc_nlinfo.nl_net; 324 struct net_device *dev = NULL; 325 struct mpls_route *rt, *old; 326 unsigned index; 327 int i; 328 int err = -EINVAL; 329 330 index = cfg->rc_label; 331 332 /* If a label was not specified during insert pick one */ 333 if ((index == LABEL_NOT_SPECIFIED) && 334 (cfg->rc_nlflags & NLM_F_CREATE)) { 335 index = find_free_label(net); 336 } 337 338 /* The first 16 labels are reserved, and may not be set */ 339 if (index < 16) 340 goto errout; 341 342 /* The full 20 bit range may not be supported. */ 343 if (index >= net->mpls.platform_labels) 344 goto errout; 345 346 /* Ensure only a supported number of labels are present */ 347 if (cfg->rc_output_labels > MAX_NEW_LABELS) 348 goto errout; 349 350 err = -ENODEV; 351 dev = dev_get_by_index(net, cfg->rc_ifindex); 352 if (!dev) 353 goto errout; 354 355 /* For now just support ethernet devices */ 356 err = -EINVAL; 357 if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) 358 goto errout; 359 360 err = -EINVAL; 361 if ((cfg->rc_via_table == NEIGH_LINK_TABLE) && 362 (dev->addr_len != cfg->rc_via_alen)) 363 goto errout; 364 365 /* Append makes no sense with mpls */ 366 err = -EOPNOTSUPP; 367 if (cfg->rc_nlflags & NLM_F_APPEND) 368 goto errout; 369 370 err = -EEXIST; 371 platform_label = rtnl_dereference(net->mpls.platform_label); 372 old = rtnl_dereference(platform_label[index]); 373 if ((cfg->rc_nlflags & NLM_F_EXCL) && old) 374 goto errout; 375 376 err = -EEXIST; 377 if (!(cfg->rc_nlflags & NLM_F_REPLACE) && old) 378 goto errout; 379 380 err = -ENOENT; 381 if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old) 382 goto errout; 383 384 err = -ENOMEM; 385 rt = mpls_rt_alloc(cfg->rc_via_alen); 386 if (!rt) 387 goto errout; 388 389 rt->rt_labels = cfg->rc_output_labels; 390 for (i = 0; i < rt->rt_labels; i++) 391 rt->rt_label[i] = cfg->rc_output_label[i]; 392 rt->rt_protocol = cfg->rc_protocol; 393 RCU_INIT_POINTER(rt->rt_dev, dev); 394 rt->rt_via_table = cfg->rc_via_table; 395 memcpy(rt->rt_via, cfg->rc_via, cfg->rc_via_alen); 396 397 mpls_route_update(net, index, NULL, rt, &cfg->rc_nlinfo); 398 399 dev_put(dev); 400 return 0; 401 402 errout: 403 if (dev) 404 dev_put(dev); 405 return err; 406 } 407 408 static int mpls_route_del(struct mpls_route_config *cfg) 409 { 410 struct net *net = cfg->rc_nlinfo.nl_net; 411 unsigned index; 412 int err = -EINVAL; 413 414 index = cfg->rc_label; 415 416 /* The first 16 labels are reserved, and may not be removed */ 417 if (index < 16) 418 goto errout; 419 420 /* The full 20 bit range may not be supported */ 421 if (index >= net->mpls.platform_labels) 422 goto errout; 423 424 mpls_route_update(net, index, NULL, NULL, &cfg->rc_nlinfo); 425 426 err = 0; 427 errout: 428 return err; 429 } 430 431 static void mpls_ifdown(struct net_device *dev) 432 { 433 struct mpls_route __rcu **platform_label; 434 struct net *net = dev_net(dev); 435 unsigned index; 436 437 platform_label = rtnl_dereference(net->mpls.platform_label); 438 for (index = 0; index < net->mpls.platform_labels; index++) { 439 struct mpls_route *rt = rtnl_dereference(platform_label[index]); 440 if (!rt) 441 continue; 442 if (rtnl_dereference(rt->rt_dev) != dev) 443 continue; 444 rt->rt_dev = NULL; 445 } 446 } 447 448 static int mpls_dev_notify(struct notifier_block *this, unsigned long event, 449 void *ptr) 450 { 451 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 452 453 switch(event) { 454 case NETDEV_UNREGISTER: 455 mpls_ifdown(dev); 456 break; 457 } 458 return NOTIFY_OK; 459 } 460 461 static struct notifier_block mpls_dev_notifier = { 462 .notifier_call = mpls_dev_notify, 463 }; 464 465 static int nla_put_via(struct sk_buff *skb, 466 u8 table, const void *addr, int alen) 467 { 468 static const int table_to_family[NEIGH_NR_TABLES + 1] = { 469 AF_INET, AF_INET6, AF_DECnet, AF_PACKET, 470 }; 471 struct nlattr *nla; 472 struct rtvia *via; 473 int family = AF_UNSPEC; 474 475 nla = nla_reserve(skb, RTA_VIA, alen + 2); 476 if (!nla) 477 return -EMSGSIZE; 478 479 if (table <= NEIGH_NR_TABLES) 480 family = table_to_family[table]; 481 482 via = nla_data(nla); 483 via->rtvia_family = family; 484 memcpy(via->rtvia_addr, addr, alen); 485 return 0; 486 } 487 488 int nla_put_labels(struct sk_buff *skb, int attrtype, 489 u8 labels, const u32 label[]) 490 { 491 struct nlattr *nla; 492 struct mpls_shim_hdr *nla_label; 493 bool bos; 494 int i; 495 nla = nla_reserve(skb, attrtype, labels*4); 496 if (!nla) 497 return -EMSGSIZE; 498 499 nla_label = nla_data(nla); 500 bos = true; 501 for (i = labels - 1; i >= 0; i--) { 502 nla_label[i] = mpls_entry_encode(label[i], 0, 0, bos); 503 bos = false; 504 } 505 506 return 0; 507 } 508 509 int nla_get_labels(const struct nlattr *nla, 510 u32 max_labels, u32 *labels, u32 label[]) 511 { 512 unsigned len = nla_len(nla); 513 unsigned nla_labels; 514 struct mpls_shim_hdr *nla_label; 515 bool bos; 516 int i; 517 518 /* len needs to be an even multiple of 4 (the label size) */ 519 if (len & 3) 520 return -EINVAL; 521 522 /* Limit the number of new labels allowed */ 523 nla_labels = len/4; 524 if (nla_labels > max_labels) 525 return -EINVAL; 526 527 nla_label = nla_data(nla); 528 bos = true; 529 for (i = nla_labels - 1; i >= 0; i--, bos = false) { 530 struct mpls_entry_decoded dec; 531 dec = mpls_entry_decode(nla_label + i); 532 533 /* Ensure the bottom of stack flag is properly set 534 * and ttl and tc are both clear. 535 */ 536 if ((dec.bos != bos) || dec.ttl || dec.tc) 537 return -EINVAL; 538 539 label[i] = dec.label; 540 } 541 *labels = nla_labels; 542 return 0; 543 } 544 545 static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, 546 struct mpls_route_config *cfg) 547 { 548 struct rtmsg *rtm; 549 struct nlattr *tb[RTA_MAX+1]; 550 int index; 551 int err; 552 553 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy); 554 if (err < 0) 555 goto errout; 556 557 err = -EINVAL; 558 rtm = nlmsg_data(nlh); 559 memset(cfg, 0, sizeof(*cfg)); 560 561 if (rtm->rtm_family != AF_MPLS) 562 goto errout; 563 if (rtm->rtm_dst_len != 20) 564 goto errout; 565 if (rtm->rtm_src_len != 0) 566 goto errout; 567 if (rtm->rtm_tos != 0) 568 goto errout; 569 if (rtm->rtm_table != RT_TABLE_MAIN) 570 goto errout; 571 /* Any value is acceptable for rtm_protocol */ 572 573 /* As mpls uses destination specific addresses 574 * (or source specific address in the case of multicast) 575 * all addresses have universal scope. 576 */ 577 if (rtm->rtm_scope != RT_SCOPE_UNIVERSE) 578 goto errout; 579 if (rtm->rtm_type != RTN_UNICAST) 580 goto errout; 581 if (rtm->rtm_flags != 0) 582 goto errout; 583 584 cfg->rc_label = LABEL_NOT_SPECIFIED; 585 cfg->rc_protocol = rtm->rtm_protocol; 586 cfg->rc_nlflags = nlh->nlmsg_flags; 587 cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid; 588 cfg->rc_nlinfo.nlh = nlh; 589 cfg->rc_nlinfo.nl_net = sock_net(skb->sk); 590 591 for (index = 0; index <= RTA_MAX; index++) { 592 struct nlattr *nla = tb[index]; 593 if (!nla) 594 continue; 595 596 switch(index) { 597 case RTA_OIF: 598 cfg->rc_ifindex = nla_get_u32(nla); 599 break; 600 case RTA_NEWDST: 601 if (nla_get_labels(nla, MAX_NEW_LABELS, 602 &cfg->rc_output_labels, 603 cfg->rc_output_label)) 604 goto errout; 605 break; 606 case RTA_DST: 607 { 608 u32 label_count; 609 if (nla_get_labels(nla, 1, &label_count, 610 &cfg->rc_label)) 611 goto errout; 612 613 /* The first 16 labels are reserved, and may not be set */ 614 if (cfg->rc_label < 16) 615 goto errout; 616 617 break; 618 } 619 case RTA_VIA: 620 { 621 struct rtvia *via = nla_data(nla); 622 if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) 623 goto errout; 624 cfg->rc_via_alen = nla_len(nla) - 625 offsetof(struct rtvia, rtvia_addr); 626 if (cfg->rc_via_alen > MAX_VIA_ALEN) 627 goto errout; 628 629 /* Validate the address family */ 630 switch(via->rtvia_family) { 631 case AF_PACKET: 632 cfg->rc_via_table = NEIGH_LINK_TABLE; 633 break; 634 case AF_INET: 635 cfg->rc_via_table = NEIGH_ARP_TABLE; 636 if (cfg->rc_via_alen != 4) 637 goto errout; 638 break; 639 case AF_INET6: 640 cfg->rc_via_table = NEIGH_ND_TABLE; 641 if (cfg->rc_via_alen != 16) 642 goto errout; 643 break; 644 default: 645 /* Unsupported address family */ 646 goto errout; 647 } 648 649 memcpy(cfg->rc_via, via->rtvia_addr, cfg->rc_via_alen); 650 break; 651 } 652 default: 653 /* Unsupported attribute */ 654 goto errout; 655 } 656 } 657 658 err = 0; 659 errout: 660 return err; 661 } 662 663 static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) 664 { 665 struct mpls_route_config cfg; 666 int err; 667 668 err = rtm_to_route_config(skb, nlh, &cfg); 669 if (err < 0) 670 return err; 671 672 return mpls_route_del(&cfg); 673 } 674 675 676 static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) 677 { 678 struct mpls_route_config cfg; 679 int err; 680 681 err = rtm_to_route_config(skb, nlh, &cfg); 682 if (err < 0) 683 return err; 684 685 return mpls_route_add(&cfg); 686 } 687 688 static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, 689 u32 label, struct mpls_route *rt, int flags) 690 { 691 struct net_device *dev; 692 struct nlmsghdr *nlh; 693 struct rtmsg *rtm; 694 695 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); 696 if (nlh == NULL) 697 return -EMSGSIZE; 698 699 rtm = nlmsg_data(nlh); 700 rtm->rtm_family = AF_MPLS; 701 rtm->rtm_dst_len = 20; 702 rtm->rtm_src_len = 0; 703 rtm->rtm_tos = 0; 704 rtm->rtm_table = RT_TABLE_MAIN; 705 rtm->rtm_protocol = rt->rt_protocol; 706 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 707 rtm->rtm_type = RTN_UNICAST; 708 rtm->rtm_flags = 0; 709 710 if (rt->rt_labels && 711 nla_put_labels(skb, RTA_NEWDST, rt->rt_labels, rt->rt_label)) 712 goto nla_put_failure; 713 if (nla_put_via(skb, rt->rt_via_table, rt->rt_via, rt->rt_via_alen)) 714 goto nla_put_failure; 715 dev = rtnl_dereference(rt->rt_dev); 716 if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) 717 goto nla_put_failure; 718 if (nla_put_labels(skb, RTA_DST, 1, &label)) 719 goto nla_put_failure; 720 721 nlmsg_end(skb, nlh); 722 return 0; 723 724 nla_put_failure: 725 nlmsg_cancel(skb, nlh); 726 return -EMSGSIZE; 727 } 728 729 static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) 730 { 731 struct net *net = sock_net(skb->sk); 732 struct mpls_route __rcu **platform_label; 733 size_t platform_labels; 734 unsigned int index; 735 736 ASSERT_RTNL(); 737 738 index = cb->args[0]; 739 if (index < 16) 740 index = 16; 741 742 platform_label = rtnl_dereference(net->mpls.platform_label); 743 platform_labels = net->mpls.platform_labels; 744 for (; index < platform_labels; index++) { 745 struct mpls_route *rt; 746 rt = rtnl_dereference(platform_label[index]); 747 if (!rt) 748 continue; 749 750 if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid, 751 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 752 index, rt, NLM_F_MULTI) < 0) 753 break; 754 } 755 cb->args[0] = index; 756 757 return skb->len; 758 } 759 760 static inline size_t lfib_nlmsg_size(struct mpls_route *rt) 761 { 762 size_t payload = 763 NLMSG_ALIGN(sizeof(struct rtmsg)) 764 + nla_total_size(2 + rt->rt_via_alen) /* RTA_VIA */ 765 + nla_total_size(4); /* RTA_DST */ 766 if (rt->rt_labels) /* RTA_NEWDST */ 767 payload += nla_total_size(rt->rt_labels * 4); 768 if (rt->rt_dev) /* RTA_OIF */ 769 payload += nla_total_size(4); 770 return payload; 771 } 772 773 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, 774 struct nlmsghdr *nlh, struct net *net, u32 portid, 775 unsigned int nlm_flags) 776 { 777 struct sk_buff *skb; 778 u32 seq = nlh ? nlh->nlmsg_seq : 0; 779 int err = -ENOBUFS; 780 781 skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL); 782 if (skb == NULL) 783 goto errout; 784 785 err = mpls_dump_route(skb, portid, seq, event, label, rt, nlm_flags); 786 if (err < 0) { 787 /* -EMSGSIZE implies BUG in lfib_nlmsg_size */ 788 WARN_ON(err == -EMSGSIZE); 789 kfree_skb(skb); 790 goto errout; 791 } 792 rtnl_notify(skb, net, portid, RTNLGRP_MPLS_ROUTE, nlh, GFP_KERNEL); 793 794 return; 795 errout: 796 if (err < 0) 797 rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err); 798 } 799 800 static int resize_platform_label_table(struct net *net, size_t limit) 801 { 802 size_t size = sizeof(struct mpls_route *) * limit; 803 size_t old_limit; 804 size_t cp_size; 805 struct mpls_route __rcu **labels = NULL, **old; 806 struct mpls_route *rt0 = NULL, *rt2 = NULL; 807 unsigned index; 808 809 if (size) { 810 labels = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); 811 if (!labels) 812 labels = vzalloc(size); 813 814 if (!labels) 815 goto nolabels; 816 } 817 818 /* In case the predefined labels need to be populated */ 819 if (limit > LABEL_IPV4_EXPLICIT_NULL) { 820 struct net_device *lo = net->loopback_dev; 821 rt0 = mpls_rt_alloc(lo->addr_len); 822 if (!rt0) 823 goto nort0; 824 RCU_INIT_POINTER(rt0->rt_dev, lo); 825 rt0->rt_protocol = RTPROT_KERNEL; 826 rt0->rt_via_table = NEIGH_LINK_TABLE; 827 memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len); 828 } 829 if (limit > LABEL_IPV6_EXPLICIT_NULL) { 830 struct net_device *lo = net->loopback_dev; 831 rt2 = mpls_rt_alloc(lo->addr_len); 832 if (!rt2) 833 goto nort2; 834 RCU_INIT_POINTER(rt2->rt_dev, lo); 835 rt2->rt_protocol = RTPROT_KERNEL; 836 rt2->rt_via_table = NEIGH_LINK_TABLE; 837 memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len); 838 } 839 840 rtnl_lock(); 841 /* Remember the original table */ 842 old = rtnl_dereference(net->mpls.platform_label); 843 old_limit = net->mpls.platform_labels; 844 845 /* Free any labels beyond the new table */ 846 for (index = limit; index < old_limit; index++) 847 mpls_route_update(net, index, NULL, NULL, NULL); 848 849 /* Copy over the old labels */ 850 cp_size = size; 851 if (old_limit < limit) 852 cp_size = old_limit * sizeof(struct mpls_route *); 853 854 memcpy(labels, old, cp_size); 855 856 /* If needed set the predefined labels */ 857 if ((old_limit <= LABEL_IPV6_EXPLICIT_NULL) && 858 (limit > LABEL_IPV6_EXPLICIT_NULL)) { 859 RCU_INIT_POINTER(labels[LABEL_IPV6_EXPLICIT_NULL], rt2); 860 rt2 = NULL; 861 } 862 863 if ((old_limit <= LABEL_IPV4_EXPLICIT_NULL) && 864 (limit > LABEL_IPV4_EXPLICIT_NULL)) { 865 RCU_INIT_POINTER(labels[LABEL_IPV4_EXPLICIT_NULL], rt0); 866 rt0 = NULL; 867 } 868 869 /* Update the global pointers */ 870 net->mpls.platform_labels = limit; 871 rcu_assign_pointer(net->mpls.platform_label, labels); 872 873 rtnl_unlock(); 874 875 mpls_rt_free(rt2); 876 mpls_rt_free(rt0); 877 878 if (old) { 879 synchronize_rcu(); 880 kvfree(old); 881 } 882 return 0; 883 884 nort2: 885 mpls_rt_free(rt0); 886 nort0: 887 kvfree(labels); 888 nolabels: 889 return -ENOMEM; 890 } 891 892 static int mpls_platform_labels(struct ctl_table *table, int write, 893 void __user *buffer, size_t *lenp, loff_t *ppos) 894 { 895 struct net *net = table->data; 896 int platform_labels = net->mpls.platform_labels; 897 int ret; 898 struct ctl_table tmp = { 899 .procname = table->procname, 900 .data = &platform_labels, 901 .maxlen = sizeof(int), 902 .mode = table->mode, 903 .extra1 = &zero, 904 .extra2 = &label_limit, 905 }; 906 907 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 908 909 if (write && ret == 0) 910 ret = resize_platform_label_table(net, platform_labels); 911 912 return ret; 913 } 914 915 static struct ctl_table mpls_table[] = { 916 { 917 .procname = "platform_labels", 918 .data = NULL, 919 .maxlen = sizeof(int), 920 .mode = 0644, 921 .proc_handler = mpls_platform_labels, 922 }, 923 { } 924 }; 925 926 static int mpls_net_init(struct net *net) 927 { 928 struct ctl_table *table; 929 930 net->mpls.platform_labels = 0; 931 net->mpls.platform_label = NULL; 932 933 table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL); 934 if (table == NULL) 935 return -ENOMEM; 936 937 table[0].data = net; 938 net->mpls.ctl = register_net_sysctl(net, "net/mpls", table); 939 if (net->mpls.ctl == NULL) 940 return -ENOMEM; 941 942 return 0; 943 } 944 945 static void mpls_net_exit(struct net *net) 946 { 947 struct mpls_route __rcu **platform_label; 948 size_t platform_labels; 949 struct ctl_table *table; 950 unsigned int index; 951 952 table = net->mpls.ctl->ctl_table_arg; 953 unregister_net_sysctl_table(net->mpls.ctl); 954 kfree(table); 955 956 /* An rcu grace period has passed since there was a device in 957 * the network namespace (and thus the last in flight packet) 958 * left this network namespace. This is because 959 * unregister_netdevice_many and netdev_run_todo has completed 960 * for each network device that was in this network namespace. 961 * 962 * As such no additional rcu synchronization is necessary when 963 * freeing the platform_label table. 964 */ 965 rtnl_lock(); 966 platform_label = rtnl_dereference(net->mpls.platform_label); 967 platform_labels = net->mpls.platform_labels; 968 for (index = 0; index < platform_labels; index++) { 969 struct mpls_route *rt = rtnl_dereference(platform_label[index]); 970 RCU_INIT_POINTER(platform_label[index], NULL); 971 mpls_rt_free(rt); 972 } 973 rtnl_unlock(); 974 975 kvfree(platform_label); 976 } 977 978 static struct pernet_operations mpls_net_ops = { 979 .init = mpls_net_init, 980 .exit = mpls_net_exit, 981 }; 982 983 static int __init mpls_init(void) 984 { 985 int err; 986 987 BUILD_BUG_ON(sizeof(struct mpls_shim_hdr) != 4); 988 989 err = register_pernet_subsys(&mpls_net_ops); 990 if (err) 991 goto out; 992 993 err = register_netdevice_notifier(&mpls_dev_notifier); 994 if (err) 995 goto out_unregister_pernet; 996 997 dev_add_pack(&mpls_packet_type); 998 999 rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL); 1000 rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL); 1001 rtnl_register(PF_MPLS, RTM_GETROUTE, NULL, mpls_dump_routes, NULL); 1002 err = 0; 1003 out: 1004 return err; 1005 1006 out_unregister_pernet: 1007 unregister_pernet_subsys(&mpls_net_ops); 1008 goto out; 1009 } 1010 module_init(mpls_init); 1011 1012 static void __exit mpls_exit(void) 1013 { 1014 rtnl_unregister_all(PF_MPLS); 1015 dev_remove_pack(&mpls_packet_type); 1016 unregister_netdevice_notifier(&mpls_dev_notifier); 1017 unregister_pernet_subsys(&mpls_net_ops); 1018 } 1019 module_exit(mpls_exit); 1020 1021 MODULE_DESCRIPTION("MultiProtocol Label Switching"); 1022 MODULE_LICENSE("GPL v2"); 1023 MODULE_ALIAS_NETPROTO(PF_MPLS); 1024