1 #include <linux/types.h> 2 #include <linux/skbuff.h> 3 #include <linux/socket.h> 4 #include <linux/sysctl.h> 5 #include <linux/net.h> 6 #include <linux/module.h> 7 #include <linux/if_arp.h> 8 #include <linux/ipv6.h> 9 #include <linux/mpls.h> 10 #include <linux/vmalloc.h> 11 #include <net/ip.h> 12 #include <net/dst.h> 13 #include <net/sock.h> 14 #include <net/arp.h> 15 #include <net/ip_fib.h> 16 #include <net/netevent.h> 17 #include <net/netns/generic.h> 18 #if IS_ENABLED(CONFIG_IPV6) 19 #include <net/ipv6.h> 20 #include <net/addrconf.h> 21 #endif 22 #include <net/nexthop.h> 23 #include "internal.h" 24 25 /* Maximum number of labels to look ahead at when selecting a path of 26 * a multipath route 27 */ 28 #define MAX_MP_SELECT_LABELS 4 29 30 #define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1) 31 32 static int zero = 0; 33 static int label_limit = (1 << 20) - 1; 34 35 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, 36 struct nlmsghdr *nlh, struct net *net, u32 portid, 37 unsigned int nlm_flags); 38 39 static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) 40 { 41 struct mpls_route *rt = NULL; 42 43 if (index < net->mpls.platform_labels) { 44 struct mpls_route __rcu **platform_label = 45 rcu_dereference(net->mpls.platform_label); 46 rt = rcu_dereference(platform_label[index]); 47 } 48 return rt; 49 } 50 51 static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) 52 { 53 return rcu_dereference_rtnl(dev->mpls_ptr); 54 } 55 56 bool mpls_output_possible(const struct net_device *dev) 57 { 58 return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); 59 } 60 EXPORT_SYMBOL_GPL(mpls_output_possible); 61 62 static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh) 63 { 64 u8 *nh0_via = PTR_ALIGN((u8 *)&rt->rt_nh[rt->rt_nhn], VIA_ALEN_ALIGN); 65 int nh_index = nh - rt->rt_nh; 66 67 return nh0_via + rt->rt_max_alen * nh_index; 68 } 69 70 static const u8 *mpls_nh_via(const struct mpls_route *rt, 71 const struct mpls_nh *nh) 72 { 73 return __mpls_nh_via((struct mpls_route *)rt, (struct mpls_nh *)nh); 74 } 75 76 static unsigned int mpls_nh_header_size(const struct mpls_nh *nh) 77 { 78 /* The size of the layer 2.5 labels to be added for this route */ 79 return nh->nh_labels * sizeof(struct mpls_shim_hdr); 80 } 81 82 unsigned int mpls_dev_mtu(const struct net_device *dev) 83 { 84 /* The amount of data the layer 2 frame can hold */ 85 return dev->mtu; 86 } 87 EXPORT_SYMBOL_GPL(mpls_dev_mtu); 88 89 bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 90 { 91 if (skb->len <= mtu) 92 return false; 93 94 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) 95 return false; 96 97 return true; 98 } 99 EXPORT_SYMBOL_GPL(mpls_pkt_too_big); 100 101 static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, 102 struct sk_buff *skb, bool bos) 103 { 104 struct mpls_entry_decoded dec; 105 struct mpls_shim_hdr *hdr; 106 bool eli_seen = false; 107 int label_index; 108 int nh_index = 0; 109 u32 hash = 0; 110 111 /* No need to look further into packet if there's only 112 * one path 113 */ 114 if (rt->rt_nhn == 1) 115 goto out; 116 117 for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos; 118 label_index++) { 119 if (!pskb_may_pull(skb, sizeof(*hdr) * label_index)) 120 break; 121 122 /* Read and decode the current label */ 123 hdr = mpls_hdr(skb) + label_index; 124 dec = mpls_entry_decode(hdr); 125 126 /* RFC6790 - reserved labels MUST NOT be used as keys 127 * for the load-balancing function 128 */ 129 if (likely(dec.label >= MPLS_LABEL_FIRST_UNRESERVED)) { 130 hash = jhash_1word(dec.label, hash); 131 132 /* The entropy label follows the entropy label 133 * indicator, so this means that the entropy 134 * label was just added to the hash - no need to 135 * go any deeper either in the label stack or in the 136 * payload 137 */ 138 if (eli_seen) 139 break; 140 } else if (dec.label == MPLS_LABEL_ENTROPY) { 141 eli_seen = true; 142 } 143 144 bos = dec.bos; 145 if (bos && pskb_may_pull(skb, sizeof(*hdr) * label_index + 146 sizeof(struct iphdr))) { 147 const struct iphdr *v4hdr; 148 149 v4hdr = (const struct iphdr *)(mpls_hdr(skb) + 150 label_index); 151 if (v4hdr->version == 4) { 152 hash = jhash_3words(ntohl(v4hdr->saddr), 153 ntohl(v4hdr->daddr), 154 v4hdr->protocol, hash); 155 } else if (v4hdr->version == 6 && 156 pskb_may_pull(skb, sizeof(*hdr) * label_index + 157 sizeof(struct ipv6hdr))) { 158 const struct ipv6hdr *v6hdr; 159 160 v6hdr = (const struct ipv6hdr *)(mpls_hdr(skb) + 161 label_index); 162 163 hash = __ipv6_addr_jhash(&v6hdr->saddr, hash); 164 hash = __ipv6_addr_jhash(&v6hdr->daddr, hash); 165 hash = jhash_1word(v6hdr->nexthdr, hash); 166 } 167 } 168 } 169 170 nh_index = hash % rt->rt_nhn; 171 out: 172 return &rt->rt_nh[nh_index]; 173 } 174 175 static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, 176 struct mpls_entry_decoded dec) 177 { 178 enum mpls_payload_type payload_type; 179 bool success = false; 180 181 /* The IPv4 code below accesses through the IPv4 header 182 * checksum, which is 12 bytes into the packet. 183 * The IPv6 code below accesses through the IPv6 hop limit 184 * which is 8 bytes into the packet. 185 * 186 * For all supported cases there should always be at least 12 187 * bytes of packet data present. The IPv4 header is 20 bytes 188 * without options and the IPv6 header is always 40 bytes 189 * long. 190 */ 191 if (!pskb_may_pull(skb, 12)) 192 return false; 193 194 payload_type = rt->rt_payload_type; 195 if (payload_type == MPT_UNSPEC) 196 payload_type = ip_hdr(skb)->version; 197 198 switch (payload_type) { 199 case MPT_IPV4: { 200 struct iphdr *hdr4 = ip_hdr(skb); 201 skb->protocol = htons(ETH_P_IP); 202 csum_replace2(&hdr4->check, 203 htons(hdr4->ttl << 8), 204 htons(dec.ttl << 8)); 205 hdr4->ttl = dec.ttl; 206 success = true; 207 break; 208 } 209 case MPT_IPV6: { 210 struct ipv6hdr *hdr6 = ipv6_hdr(skb); 211 skb->protocol = htons(ETH_P_IPV6); 212 hdr6->hop_limit = dec.ttl; 213 success = true; 214 break; 215 } 216 case MPT_UNSPEC: 217 break; 218 } 219 220 return success; 221 } 222 223 static int mpls_forward(struct sk_buff *skb, struct net_device *dev, 224 struct packet_type *pt, struct net_device *orig_dev) 225 { 226 struct net *net = dev_net(dev); 227 struct mpls_shim_hdr *hdr; 228 struct mpls_route *rt; 229 struct mpls_nh *nh; 230 struct mpls_entry_decoded dec; 231 struct net_device *out_dev; 232 struct mpls_dev *mdev; 233 unsigned int hh_len; 234 unsigned int new_header_size; 235 unsigned int mtu; 236 int err; 237 238 /* Careful this entire function runs inside of an rcu critical section */ 239 240 mdev = mpls_dev_get(dev); 241 if (!mdev || !mdev->input_enabled) 242 goto drop; 243 244 if (skb->pkt_type != PACKET_HOST) 245 goto drop; 246 247 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 248 goto drop; 249 250 if (!pskb_may_pull(skb, sizeof(*hdr))) 251 goto drop; 252 253 /* Read and decode the label */ 254 hdr = mpls_hdr(skb); 255 dec = mpls_entry_decode(hdr); 256 257 /* Pop the label */ 258 skb_pull(skb, sizeof(*hdr)); 259 skb_reset_network_header(skb); 260 261 skb_orphan(skb); 262 263 rt = mpls_route_input_rcu(net, dec.label); 264 if (!rt) 265 goto drop; 266 267 nh = mpls_select_multipath(rt, skb, dec.bos); 268 if (!nh) 269 goto drop; 270 271 /* Find the output device */ 272 out_dev = rcu_dereference(nh->nh_dev); 273 if (!mpls_output_possible(out_dev)) 274 goto drop; 275 276 if (skb_warn_if_lro(skb)) 277 goto drop; 278 279 skb_forward_csum(skb); 280 281 /* Verify ttl is valid */ 282 if (dec.ttl <= 1) 283 goto drop; 284 dec.ttl -= 1; 285 286 /* Verify the destination can hold the packet */ 287 new_header_size = mpls_nh_header_size(nh); 288 mtu = mpls_dev_mtu(out_dev); 289 if (mpls_pkt_too_big(skb, mtu - new_header_size)) 290 goto drop; 291 292 hh_len = LL_RESERVED_SPACE(out_dev); 293 if (!out_dev->header_ops) 294 hh_len = 0; 295 296 /* Ensure there is enough space for the headers in the skb */ 297 if (skb_cow(skb, hh_len + new_header_size)) 298 goto drop; 299 300 skb->dev = out_dev; 301 skb->protocol = htons(ETH_P_MPLS_UC); 302 303 if (unlikely(!new_header_size && dec.bos)) { 304 /* Penultimate hop popping */ 305 if (!mpls_egress(rt, skb, dec)) 306 goto drop; 307 } else { 308 bool bos; 309 int i; 310 skb_push(skb, new_header_size); 311 skb_reset_network_header(skb); 312 /* Push the new labels */ 313 hdr = mpls_hdr(skb); 314 bos = dec.bos; 315 for (i = nh->nh_labels - 1; i >= 0; i--) { 316 hdr[i] = mpls_entry_encode(nh->nh_label[i], 317 dec.ttl, 0, bos); 318 bos = false; 319 } 320 } 321 322 /* If via wasn't specified then send out using device address */ 323 if (nh->nh_via_table == MPLS_NEIGH_TABLE_UNSPEC) 324 err = neigh_xmit(NEIGH_LINK_TABLE, out_dev, 325 out_dev->dev_addr, skb); 326 else 327 err = neigh_xmit(nh->nh_via_table, out_dev, 328 mpls_nh_via(rt, nh), skb); 329 if (err) 330 net_dbg_ratelimited("%s: packet transmission failed: %d\n", 331 __func__, err); 332 return 0; 333 334 drop: 335 kfree_skb(skb); 336 return NET_RX_DROP; 337 } 338 339 static struct packet_type mpls_packet_type __read_mostly = { 340 .type = cpu_to_be16(ETH_P_MPLS_UC), 341 .func = mpls_forward, 342 }; 343 344 static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = { 345 [RTA_DST] = { .type = NLA_U32 }, 346 [RTA_OIF] = { .type = NLA_U32 }, 347 }; 348 349 struct mpls_route_config { 350 u32 rc_protocol; 351 u32 rc_ifindex; 352 u8 rc_via_table; 353 u8 rc_via_alen; 354 u8 rc_via[MAX_VIA_ALEN]; 355 u32 rc_label; 356 u8 rc_output_labels; 357 u32 rc_output_label[MAX_NEW_LABELS]; 358 u32 rc_nlflags; 359 enum mpls_payload_type rc_payload_type; 360 struct nl_info rc_nlinfo; 361 struct rtnexthop *rc_mp; 362 int rc_mp_len; 363 }; 364 365 static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen) 366 { 367 u8 max_alen_aligned = ALIGN(max_alen, VIA_ALEN_ALIGN); 368 struct mpls_route *rt; 369 370 rt = kzalloc(ALIGN(sizeof(*rt) + num_nh * sizeof(*rt->rt_nh), 371 VIA_ALEN_ALIGN) + 372 num_nh * max_alen_aligned, 373 GFP_KERNEL); 374 if (rt) { 375 rt->rt_nhn = num_nh; 376 rt->rt_max_alen = max_alen_aligned; 377 } 378 379 return rt; 380 } 381 382 static void mpls_rt_free(struct mpls_route *rt) 383 { 384 if (rt) 385 kfree_rcu(rt, rt_rcu); 386 } 387 388 static void mpls_notify_route(struct net *net, unsigned index, 389 struct mpls_route *old, struct mpls_route *new, 390 const struct nl_info *info) 391 { 392 struct nlmsghdr *nlh = info ? info->nlh : NULL; 393 unsigned portid = info ? info->portid : 0; 394 int event = new ? RTM_NEWROUTE : RTM_DELROUTE; 395 struct mpls_route *rt = new ? new : old; 396 unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0; 397 /* Ignore reserved labels for now */ 398 if (rt && (index >= MPLS_LABEL_FIRST_UNRESERVED)) 399 rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags); 400 } 401 402 static void mpls_route_update(struct net *net, unsigned index, 403 struct mpls_route *new, 404 const struct nl_info *info) 405 { 406 struct mpls_route __rcu **platform_label; 407 struct mpls_route *rt; 408 409 ASSERT_RTNL(); 410 411 platform_label = rtnl_dereference(net->mpls.platform_label); 412 rt = rtnl_dereference(platform_label[index]); 413 rcu_assign_pointer(platform_label[index], new); 414 415 mpls_notify_route(net, index, rt, new, info); 416 417 /* If we removed a route free it now */ 418 mpls_rt_free(rt); 419 } 420 421 static unsigned find_free_label(struct net *net) 422 { 423 struct mpls_route __rcu **platform_label; 424 size_t platform_labels; 425 unsigned index; 426 427 platform_label = rtnl_dereference(net->mpls.platform_label); 428 platform_labels = net->mpls.platform_labels; 429 for (index = MPLS_LABEL_FIRST_UNRESERVED; index < platform_labels; 430 index++) { 431 if (!rtnl_dereference(platform_label[index])) 432 return index; 433 } 434 return LABEL_NOT_SPECIFIED; 435 } 436 437 #if IS_ENABLED(CONFIG_INET) 438 static struct net_device *inet_fib_lookup_dev(struct net *net, 439 const void *addr) 440 { 441 struct net_device *dev; 442 struct rtable *rt; 443 struct in_addr daddr; 444 445 memcpy(&daddr, addr, sizeof(struct in_addr)); 446 rt = ip_route_output(net, daddr.s_addr, 0, 0, 0); 447 if (IS_ERR(rt)) 448 return ERR_CAST(rt); 449 450 dev = rt->dst.dev; 451 dev_hold(dev); 452 453 ip_rt_put(rt); 454 455 return dev; 456 } 457 #else 458 static struct net_device *inet_fib_lookup_dev(struct net *net, 459 const void *addr) 460 { 461 return ERR_PTR(-EAFNOSUPPORT); 462 } 463 #endif 464 465 #if IS_ENABLED(CONFIG_IPV6) 466 static struct net_device *inet6_fib_lookup_dev(struct net *net, 467 const void *addr) 468 { 469 struct net_device *dev; 470 struct dst_entry *dst; 471 struct flowi6 fl6; 472 int err; 473 474 if (!ipv6_stub) 475 return ERR_PTR(-EAFNOSUPPORT); 476 477 memset(&fl6, 0, sizeof(fl6)); 478 memcpy(&fl6.daddr, addr, sizeof(struct in6_addr)); 479 err = ipv6_stub->ipv6_dst_lookup(net, NULL, &dst, &fl6); 480 if (err) 481 return ERR_PTR(err); 482 483 dev = dst->dev; 484 dev_hold(dev); 485 dst_release(dst); 486 487 return dev; 488 } 489 #else 490 static struct net_device *inet6_fib_lookup_dev(struct net *net, 491 const void *addr) 492 { 493 return ERR_PTR(-EAFNOSUPPORT); 494 } 495 #endif 496 497 static struct net_device *find_outdev(struct net *net, 498 struct mpls_route *rt, 499 struct mpls_nh *nh, int oif) 500 { 501 struct net_device *dev = NULL; 502 503 if (!oif) { 504 switch (nh->nh_via_table) { 505 case NEIGH_ARP_TABLE: 506 dev = inet_fib_lookup_dev(net, mpls_nh_via(rt, nh)); 507 break; 508 case NEIGH_ND_TABLE: 509 dev = inet6_fib_lookup_dev(net, mpls_nh_via(rt, nh)); 510 break; 511 case NEIGH_LINK_TABLE: 512 break; 513 } 514 } else { 515 dev = dev_get_by_index(net, oif); 516 } 517 518 if (!dev) 519 return ERR_PTR(-ENODEV); 520 521 /* The caller is holding rtnl anyways, so release the dev reference */ 522 dev_put(dev); 523 524 return dev; 525 } 526 527 static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, 528 struct mpls_nh *nh, int oif) 529 { 530 struct net_device *dev = NULL; 531 int err = -ENODEV; 532 533 dev = find_outdev(net, rt, nh, oif); 534 if (IS_ERR(dev)) { 535 err = PTR_ERR(dev); 536 dev = NULL; 537 goto errout; 538 } 539 540 /* Ensure this is a supported device */ 541 err = -EINVAL; 542 if (!mpls_dev_get(dev)) 543 goto errout; 544 545 if ((nh->nh_via_table == NEIGH_LINK_TABLE) && 546 (dev->addr_len != nh->nh_via_alen)) 547 goto errout; 548 549 RCU_INIT_POINTER(nh->nh_dev, dev); 550 551 return 0; 552 553 errout: 554 return err; 555 } 556 557 static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, 558 struct mpls_route *rt) 559 { 560 struct net *net = cfg->rc_nlinfo.nl_net; 561 struct mpls_nh *nh = rt->rt_nh; 562 int err; 563 int i; 564 565 if (!nh) 566 return -ENOMEM; 567 568 err = -EINVAL; 569 /* Ensure only a supported number of labels are present */ 570 if (cfg->rc_output_labels > MAX_NEW_LABELS) 571 goto errout; 572 573 nh->nh_labels = cfg->rc_output_labels; 574 for (i = 0; i < nh->nh_labels; i++) 575 nh->nh_label[i] = cfg->rc_output_label[i]; 576 577 nh->nh_via_table = cfg->rc_via_table; 578 memcpy(__mpls_nh_via(rt, nh), cfg->rc_via, cfg->rc_via_alen); 579 nh->nh_via_alen = cfg->rc_via_alen; 580 581 err = mpls_nh_assign_dev(net, rt, nh, cfg->rc_ifindex); 582 if (err) 583 goto errout; 584 585 return 0; 586 587 errout: 588 return err; 589 } 590 591 static int mpls_nh_build(struct net *net, struct mpls_route *rt, 592 struct mpls_nh *nh, int oif, 593 struct nlattr *via, struct nlattr *newdst) 594 { 595 int err = -ENOMEM; 596 597 if (!nh) 598 goto errout; 599 600 if (newdst) { 601 err = nla_get_labels(newdst, MAX_NEW_LABELS, 602 &nh->nh_labels, nh->nh_label); 603 if (err) 604 goto errout; 605 } 606 607 if (via) { 608 err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table, 609 __mpls_nh_via(rt, nh)); 610 if (err) 611 goto errout; 612 } else { 613 nh->nh_via_table = MPLS_NEIGH_TABLE_UNSPEC; 614 } 615 616 err = mpls_nh_assign_dev(net, rt, nh, oif); 617 if (err) 618 goto errout; 619 620 return 0; 621 622 errout: 623 return err; 624 } 625 626 static int mpls_count_nexthops(struct rtnexthop *rtnh, int len, 627 u8 cfg_via_alen, u8 *max_via_alen) 628 { 629 int nhs = 0; 630 int remaining = len; 631 632 if (!rtnh) { 633 *max_via_alen = cfg_via_alen; 634 return 1; 635 } 636 637 *max_via_alen = 0; 638 639 while (rtnh_ok(rtnh, remaining)) { 640 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 641 int attrlen; 642 643 attrlen = rtnh_attrlen(rtnh); 644 nla = nla_find(attrs, attrlen, RTA_VIA); 645 if (nla && nla_len(nla) >= 646 offsetof(struct rtvia, rtvia_addr)) { 647 int via_alen = nla_len(nla) - 648 offsetof(struct rtvia, rtvia_addr); 649 650 if (via_alen <= MAX_VIA_ALEN) 651 *max_via_alen = max_t(u16, *max_via_alen, 652 via_alen); 653 } 654 655 nhs++; 656 rtnh = rtnh_next(rtnh, &remaining); 657 } 658 659 /* leftover implies invalid nexthop configuration, discard it */ 660 return remaining > 0 ? 0 : nhs; 661 } 662 663 static int mpls_nh_build_multi(struct mpls_route_config *cfg, 664 struct mpls_route *rt) 665 { 666 struct rtnexthop *rtnh = cfg->rc_mp; 667 struct nlattr *nla_via, *nla_newdst; 668 int remaining = cfg->rc_mp_len; 669 int nhs = 0; 670 int err = 0; 671 672 change_nexthops(rt) { 673 int attrlen; 674 675 nla_via = NULL; 676 nla_newdst = NULL; 677 678 err = -EINVAL; 679 if (!rtnh_ok(rtnh, remaining)) 680 goto errout; 681 682 /* neither weighted multipath nor any flags 683 * are supported 684 */ 685 if (rtnh->rtnh_hops || rtnh->rtnh_flags) 686 goto errout; 687 688 attrlen = rtnh_attrlen(rtnh); 689 if (attrlen > 0) { 690 struct nlattr *attrs = rtnh_attrs(rtnh); 691 692 nla_via = nla_find(attrs, attrlen, RTA_VIA); 693 nla_newdst = nla_find(attrs, attrlen, RTA_NEWDST); 694 } 695 696 err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, 697 rtnh->rtnh_ifindex, nla_via, 698 nla_newdst); 699 if (err) 700 goto errout; 701 702 rtnh = rtnh_next(rtnh, &remaining); 703 nhs++; 704 } endfor_nexthops(rt); 705 706 rt->rt_nhn = nhs; 707 708 return 0; 709 710 errout: 711 return err; 712 } 713 714 static int mpls_route_add(struct mpls_route_config *cfg) 715 { 716 struct mpls_route __rcu **platform_label; 717 struct net *net = cfg->rc_nlinfo.nl_net; 718 struct mpls_route *rt, *old; 719 int err = -EINVAL; 720 u8 max_via_alen; 721 unsigned index; 722 int nhs; 723 724 index = cfg->rc_label; 725 726 /* If a label was not specified during insert pick one */ 727 if ((index == LABEL_NOT_SPECIFIED) && 728 (cfg->rc_nlflags & NLM_F_CREATE)) { 729 index = find_free_label(net); 730 } 731 732 /* Reserved labels may not be set */ 733 if (index < MPLS_LABEL_FIRST_UNRESERVED) 734 goto errout; 735 736 /* The full 20 bit range may not be supported. */ 737 if (index >= net->mpls.platform_labels) 738 goto errout; 739 740 /* Append makes no sense with mpls */ 741 err = -EOPNOTSUPP; 742 if (cfg->rc_nlflags & NLM_F_APPEND) 743 goto errout; 744 745 err = -EEXIST; 746 platform_label = rtnl_dereference(net->mpls.platform_label); 747 old = rtnl_dereference(platform_label[index]); 748 if ((cfg->rc_nlflags & NLM_F_EXCL) && old) 749 goto errout; 750 751 err = -EEXIST; 752 if (!(cfg->rc_nlflags & NLM_F_REPLACE) && old) 753 goto errout; 754 755 err = -ENOENT; 756 if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old) 757 goto errout; 758 759 err = -EINVAL; 760 nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len, 761 cfg->rc_via_alen, &max_via_alen); 762 if (nhs == 0) 763 goto errout; 764 765 err = -ENOMEM; 766 rt = mpls_rt_alloc(nhs, max_via_alen); 767 if (!rt) 768 goto errout; 769 770 rt->rt_protocol = cfg->rc_protocol; 771 rt->rt_payload_type = cfg->rc_payload_type; 772 773 if (cfg->rc_mp) 774 err = mpls_nh_build_multi(cfg, rt); 775 else 776 err = mpls_nh_build_from_cfg(cfg, rt); 777 if (err) 778 goto freert; 779 780 mpls_route_update(net, index, rt, &cfg->rc_nlinfo); 781 782 return 0; 783 784 freert: 785 mpls_rt_free(rt); 786 errout: 787 return err; 788 } 789 790 static int mpls_route_del(struct mpls_route_config *cfg) 791 { 792 struct net *net = cfg->rc_nlinfo.nl_net; 793 unsigned index; 794 int err = -EINVAL; 795 796 index = cfg->rc_label; 797 798 /* Reserved labels may not be removed */ 799 if (index < MPLS_LABEL_FIRST_UNRESERVED) 800 goto errout; 801 802 /* The full 20 bit range may not be supported */ 803 if (index >= net->mpls.platform_labels) 804 goto errout; 805 806 mpls_route_update(net, index, NULL, &cfg->rc_nlinfo); 807 808 err = 0; 809 errout: 810 return err; 811 } 812 813 #define MPLS_PERDEV_SYSCTL_OFFSET(field) \ 814 (&((struct mpls_dev *)0)->field) 815 816 static const struct ctl_table mpls_dev_table[] = { 817 { 818 .procname = "input", 819 .maxlen = sizeof(int), 820 .mode = 0644, 821 .proc_handler = proc_dointvec, 822 .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled), 823 }, 824 { } 825 }; 826 827 static int mpls_dev_sysctl_register(struct net_device *dev, 828 struct mpls_dev *mdev) 829 { 830 char path[sizeof("net/mpls/conf/") + IFNAMSIZ]; 831 struct ctl_table *table; 832 int i; 833 834 table = kmemdup(&mpls_dev_table, sizeof(mpls_dev_table), GFP_KERNEL); 835 if (!table) 836 goto out; 837 838 /* Table data contains only offsets relative to the base of 839 * the mdev at this point, so make them absolute. 840 */ 841 for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++) 842 table[i].data = (char *)mdev + (uintptr_t)table[i].data; 843 844 snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name); 845 846 mdev->sysctl = register_net_sysctl(dev_net(dev), path, table); 847 if (!mdev->sysctl) 848 goto free; 849 850 return 0; 851 852 free: 853 kfree(table); 854 out: 855 return -ENOBUFS; 856 } 857 858 static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev) 859 { 860 struct ctl_table *table; 861 862 table = mdev->sysctl->ctl_table_arg; 863 unregister_net_sysctl_table(mdev->sysctl); 864 kfree(table); 865 } 866 867 static struct mpls_dev *mpls_add_dev(struct net_device *dev) 868 { 869 struct mpls_dev *mdev; 870 int err = -ENOMEM; 871 872 ASSERT_RTNL(); 873 874 mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); 875 if (!mdev) 876 return ERR_PTR(err); 877 878 err = mpls_dev_sysctl_register(dev, mdev); 879 if (err) 880 goto free; 881 882 rcu_assign_pointer(dev->mpls_ptr, mdev); 883 884 return mdev; 885 886 free: 887 kfree(mdev); 888 return ERR_PTR(err); 889 } 890 891 static void mpls_ifdown(struct net_device *dev) 892 { 893 struct mpls_route __rcu **platform_label; 894 struct net *net = dev_net(dev); 895 struct mpls_dev *mdev; 896 unsigned index; 897 898 platform_label = rtnl_dereference(net->mpls.platform_label); 899 for (index = 0; index < net->mpls.platform_labels; index++) { 900 struct mpls_route *rt = rtnl_dereference(platform_label[index]); 901 if (!rt) 902 continue; 903 for_nexthops(rt) { 904 if (rtnl_dereference(nh->nh_dev) != dev) 905 continue; 906 nh->nh_dev = NULL; 907 } endfor_nexthops(rt); 908 } 909 910 mdev = mpls_dev_get(dev); 911 if (!mdev) 912 return; 913 914 mpls_dev_sysctl_unregister(mdev); 915 916 RCU_INIT_POINTER(dev->mpls_ptr, NULL); 917 918 kfree_rcu(mdev, rcu); 919 } 920 921 static int mpls_dev_notify(struct notifier_block *this, unsigned long event, 922 void *ptr) 923 { 924 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 925 struct mpls_dev *mdev; 926 927 switch(event) { 928 case NETDEV_REGISTER: 929 /* For now just support ethernet devices */ 930 if ((dev->type == ARPHRD_ETHER) || 931 (dev->type == ARPHRD_LOOPBACK)) { 932 mdev = mpls_add_dev(dev); 933 if (IS_ERR(mdev)) 934 return notifier_from_errno(PTR_ERR(mdev)); 935 } 936 break; 937 938 case NETDEV_UNREGISTER: 939 mpls_ifdown(dev); 940 break; 941 case NETDEV_CHANGENAME: 942 mdev = mpls_dev_get(dev); 943 if (mdev) { 944 int err; 945 946 mpls_dev_sysctl_unregister(mdev); 947 err = mpls_dev_sysctl_register(dev, mdev); 948 if (err) 949 return notifier_from_errno(err); 950 } 951 break; 952 } 953 return NOTIFY_OK; 954 } 955 956 static struct notifier_block mpls_dev_notifier = { 957 .notifier_call = mpls_dev_notify, 958 }; 959 960 static int nla_put_via(struct sk_buff *skb, 961 u8 table, const void *addr, int alen) 962 { 963 static const int table_to_family[NEIGH_NR_TABLES + 1] = { 964 AF_INET, AF_INET6, AF_DECnet, AF_PACKET, 965 }; 966 struct nlattr *nla; 967 struct rtvia *via; 968 int family = AF_UNSPEC; 969 970 nla = nla_reserve(skb, RTA_VIA, alen + 2); 971 if (!nla) 972 return -EMSGSIZE; 973 974 if (table <= NEIGH_NR_TABLES) 975 family = table_to_family[table]; 976 977 via = nla_data(nla); 978 via->rtvia_family = family; 979 memcpy(via->rtvia_addr, addr, alen); 980 return 0; 981 } 982 983 int nla_put_labels(struct sk_buff *skb, int attrtype, 984 u8 labels, const u32 label[]) 985 { 986 struct nlattr *nla; 987 struct mpls_shim_hdr *nla_label; 988 bool bos; 989 int i; 990 nla = nla_reserve(skb, attrtype, labels*4); 991 if (!nla) 992 return -EMSGSIZE; 993 994 nla_label = nla_data(nla); 995 bos = true; 996 for (i = labels - 1; i >= 0; i--) { 997 nla_label[i] = mpls_entry_encode(label[i], 0, 0, bos); 998 bos = false; 999 } 1000 1001 return 0; 1002 } 1003 EXPORT_SYMBOL_GPL(nla_put_labels); 1004 1005 int nla_get_labels(const struct nlattr *nla, 1006 u32 max_labels, u8 *labels, u32 label[]) 1007 { 1008 unsigned len = nla_len(nla); 1009 unsigned nla_labels; 1010 struct mpls_shim_hdr *nla_label; 1011 bool bos; 1012 int i; 1013 1014 /* len needs to be an even multiple of 4 (the label size) */ 1015 if (len & 3) 1016 return -EINVAL; 1017 1018 /* Limit the number of new labels allowed */ 1019 nla_labels = len/4; 1020 if (nla_labels > max_labels) 1021 return -EINVAL; 1022 1023 nla_label = nla_data(nla); 1024 bos = true; 1025 for (i = nla_labels - 1; i >= 0; i--, bos = false) { 1026 struct mpls_entry_decoded dec; 1027 dec = mpls_entry_decode(nla_label + i); 1028 1029 /* Ensure the bottom of stack flag is properly set 1030 * and ttl and tc are both clear. 1031 */ 1032 if ((dec.bos != bos) || dec.ttl || dec.tc) 1033 return -EINVAL; 1034 1035 switch (dec.label) { 1036 case MPLS_LABEL_IMPLNULL: 1037 /* RFC3032: This is a label that an LSR may 1038 * assign and distribute, but which never 1039 * actually appears in the encapsulation. 1040 */ 1041 return -EINVAL; 1042 } 1043 1044 label[i] = dec.label; 1045 } 1046 *labels = nla_labels; 1047 return 0; 1048 } 1049 EXPORT_SYMBOL_GPL(nla_get_labels); 1050 1051 int nla_get_via(const struct nlattr *nla, u8 *via_alen, 1052 u8 *via_table, u8 via_addr[]) 1053 { 1054 struct rtvia *via = nla_data(nla); 1055 int err = -EINVAL; 1056 int alen; 1057 1058 if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) 1059 goto errout; 1060 alen = nla_len(nla) - 1061 offsetof(struct rtvia, rtvia_addr); 1062 if (alen > MAX_VIA_ALEN) 1063 goto errout; 1064 1065 /* Validate the address family */ 1066 switch (via->rtvia_family) { 1067 case AF_PACKET: 1068 *via_table = NEIGH_LINK_TABLE; 1069 break; 1070 case AF_INET: 1071 *via_table = NEIGH_ARP_TABLE; 1072 if (alen != 4) 1073 goto errout; 1074 break; 1075 case AF_INET6: 1076 *via_table = NEIGH_ND_TABLE; 1077 if (alen != 16) 1078 goto errout; 1079 break; 1080 default: 1081 /* Unsupported address family */ 1082 goto errout; 1083 } 1084 1085 memcpy(via_addr, via->rtvia_addr, alen); 1086 *via_alen = alen; 1087 err = 0; 1088 1089 errout: 1090 return err; 1091 } 1092 1093 static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, 1094 struct mpls_route_config *cfg) 1095 { 1096 struct rtmsg *rtm; 1097 struct nlattr *tb[RTA_MAX+1]; 1098 int index; 1099 int err; 1100 1101 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy); 1102 if (err < 0) 1103 goto errout; 1104 1105 err = -EINVAL; 1106 rtm = nlmsg_data(nlh); 1107 memset(cfg, 0, sizeof(*cfg)); 1108 1109 if (rtm->rtm_family != AF_MPLS) 1110 goto errout; 1111 if (rtm->rtm_dst_len != 20) 1112 goto errout; 1113 if (rtm->rtm_src_len != 0) 1114 goto errout; 1115 if (rtm->rtm_tos != 0) 1116 goto errout; 1117 if (rtm->rtm_table != RT_TABLE_MAIN) 1118 goto errout; 1119 /* Any value is acceptable for rtm_protocol */ 1120 1121 /* As mpls uses destination specific addresses 1122 * (or source specific address in the case of multicast) 1123 * all addresses have universal scope. 1124 */ 1125 if (rtm->rtm_scope != RT_SCOPE_UNIVERSE) 1126 goto errout; 1127 if (rtm->rtm_type != RTN_UNICAST) 1128 goto errout; 1129 if (rtm->rtm_flags != 0) 1130 goto errout; 1131 1132 cfg->rc_label = LABEL_NOT_SPECIFIED; 1133 cfg->rc_protocol = rtm->rtm_protocol; 1134 cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC; 1135 cfg->rc_nlflags = nlh->nlmsg_flags; 1136 cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid; 1137 cfg->rc_nlinfo.nlh = nlh; 1138 cfg->rc_nlinfo.nl_net = sock_net(skb->sk); 1139 1140 for (index = 0; index <= RTA_MAX; index++) { 1141 struct nlattr *nla = tb[index]; 1142 if (!nla) 1143 continue; 1144 1145 switch(index) { 1146 case RTA_OIF: 1147 cfg->rc_ifindex = nla_get_u32(nla); 1148 break; 1149 case RTA_NEWDST: 1150 if (nla_get_labels(nla, MAX_NEW_LABELS, 1151 &cfg->rc_output_labels, 1152 cfg->rc_output_label)) 1153 goto errout; 1154 break; 1155 case RTA_DST: 1156 { 1157 u8 label_count; 1158 if (nla_get_labels(nla, 1, &label_count, 1159 &cfg->rc_label)) 1160 goto errout; 1161 1162 /* Reserved labels may not be set */ 1163 if (cfg->rc_label < MPLS_LABEL_FIRST_UNRESERVED) 1164 goto errout; 1165 1166 break; 1167 } 1168 case RTA_VIA: 1169 { 1170 if (nla_get_via(nla, &cfg->rc_via_alen, 1171 &cfg->rc_via_table, cfg->rc_via)) 1172 goto errout; 1173 break; 1174 } 1175 case RTA_MULTIPATH: 1176 { 1177 cfg->rc_mp = nla_data(nla); 1178 cfg->rc_mp_len = nla_len(nla); 1179 break; 1180 } 1181 default: 1182 /* Unsupported attribute */ 1183 goto errout; 1184 } 1185 } 1186 1187 err = 0; 1188 errout: 1189 return err; 1190 } 1191 1192 static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) 1193 { 1194 struct mpls_route_config cfg; 1195 int err; 1196 1197 err = rtm_to_route_config(skb, nlh, &cfg); 1198 if (err < 0) 1199 return err; 1200 1201 return mpls_route_del(&cfg); 1202 } 1203 1204 1205 static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) 1206 { 1207 struct mpls_route_config cfg; 1208 int err; 1209 1210 err = rtm_to_route_config(skb, nlh, &cfg); 1211 if (err < 0) 1212 return err; 1213 1214 return mpls_route_add(&cfg); 1215 } 1216 1217 static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, 1218 u32 label, struct mpls_route *rt, int flags) 1219 { 1220 struct net_device *dev; 1221 struct nlmsghdr *nlh; 1222 struct rtmsg *rtm; 1223 1224 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); 1225 if (nlh == NULL) 1226 return -EMSGSIZE; 1227 1228 rtm = nlmsg_data(nlh); 1229 rtm->rtm_family = AF_MPLS; 1230 rtm->rtm_dst_len = 20; 1231 rtm->rtm_src_len = 0; 1232 rtm->rtm_tos = 0; 1233 rtm->rtm_table = RT_TABLE_MAIN; 1234 rtm->rtm_protocol = rt->rt_protocol; 1235 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 1236 rtm->rtm_type = RTN_UNICAST; 1237 rtm->rtm_flags = 0; 1238 1239 if (nla_put_labels(skb, RTA_DST, 1, &label)) 1240 goto nla_put_failure; 1241 if (rt->rt_nhn == 1) { 1242 const struct mpls_nh *nh = rt->rt_nh; 1243 1244 if (nh->nh_labels && 1245 nla_put_labels(skb, RTA_NEWDST, nh->nh_labels, 1246 nh->nh_label)) 1247 goto nla_put_failure; 1248 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC && 1249 nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh), 1250 nh->nh_via_alen)) 1251 goto nla_put_failure; 1252 dev = rtnl_dereference(nh->nh_dev); 1253 if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) 1254 goto nla_put_failure; 1255 } else { 1256 struct rtnexthop *rtnh; 1257 struct nlattr *mp; 1258 1259 mp = nla_nest_start(skb, RTA_MULTIPATH); 1260 if (!mp) 1261 goto nla_put_failure; 1262 1263 for_nexthops(rt) { 1264 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 1265 if (!rtnh) 1266 goto nla_put_failure; 1267 1268 dev = rtnl_dereference(nh->nh_dev); 1269 if (dev) 1270 rtnh->rtnh_ifindex = dev->ifindex; 1271 if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST, 1272 nh->nh_labels, 1273 nh->nh_label)) 1274 goto nla_put_failure; 1275 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC && 1276 nla_put_via(skb, nh->nh_via_table, 1277 mpls_nh_via(rt, nh), 1278 nh->nh_via_alen)) 1279 goto nla_put_failure; 1280 1281 /* length of rtnetlink header + attributes */ 1282 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; 1283 } endfor_nexthops(rt); 1284 1285 nla_nest_end(skb, mp); 1286 } 1287 1288 nlmsg_end(skb, nlh); 1289 return 0; 1290 1291 nla_put_failure: 1292 nlmsg_cancel(skb, nlh); 1293 return -EMSGSIZE; 1294 } 1295 1296 static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) 1297 { 1298 struct net *net = sock_net(skb->sk); 1299 struct mpls_route __rcu **platform_label; 1300 size_t platform_labels; 1301 unsigned int index; 1302 1303 ASSERT_RTNL(); 1304 1305 index = cb->args[0]; 1306 if (index < MPLS_LABEL_FIRST_UNRESERVED) 1307 index = MPLS_LABEL_FIRST_UNRESERVED; 1308 1309 platform_label = rtnl_dereference(net->mpls.platform_label); 1310 platform_labels = net->mpls.platform_labels; 1311 for (; index < platform_labels; index++) { 1312 struct mpls_route *rt; 1313 rt = rtnl_dereference(platform_label[index]); 1314 if (!rt) 1315 continue; 1316 1317 if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid, 1318 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1319 index, rt, NLM_F_MULTI) < 0) 1320 break; 1321 } 1322 cb->args[0] = index; 1323 1324 return skb->len; 1325 } 1326 1327 static inline size_t lfib_nlmsg_size(struct mpls_route *rt) 1328 { 1329 size_t payload = 1330 NLMSG_ALIGN(sizeof(struct rtmsg)) 1331 + nla_total_size(4); /* RTA_DST */ 1332 1333 if (rt->rt_nhn == 1) { 1334 struct mpls_nh *nh = rt->rt_nh; 1335 1336 if (nh->nh_dev) 1337 payload += nla_total_size(4); /* RTA_OIF */ 1338 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) /* RTA_VIA */ 1339 payload += nla_total_size(2 + nh->nh_via_alen); 1340 if (nh->nh_labels) /* RTA_NEWDST */ 1341 payload += nla_total_size(nh->nh_labels * 4); 1342 } else { 1343 /* each nexthop is packed in an attribute */ 1344 size_t nhsize = 0; 1345 1346 for_nexthops(rt) { 1347 nhsize += nla_total_size(sizeof(struct rtnexthop)); 1348 /* RTA_VIA */ 1349 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) 1350 nhsize += nla_total_size(2 + nh->nh_via_alen); 1351 if (nh->nh_labels) 1352 nhsize += nla_total_size(nh->nh_labels * 4); 1353 } endfor_nexthops(rt); 1354 /* nested attribute */ 1355 payload += nla_total_size(nhsize); 1356 } 1357 1358 return payload; 1359 } 1360 1361 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, 1362 struct nlmsghdr *nlh, struct net *net, u32 portid, 1363 unsigned int nlm_flags) 1364 { 1365 struct sk_buff *skb; 1366 u32 seq = nlh ? nlh->nlmsg_seq : 0; 1367 int err = -ENOBUFS; 1368 1369 skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL); 1370 if (skb == NULL) 1371 goto errout; 1372 1373 err = mpls_dump_route(skb, portid, seq, event, label, rt, nlm_flags); 1374 if (err < 0) { 1375 /* -EMSGSIZE implies BUG in lfib_nlmsg_size */ 1376 WARN_ON(err == -EMSGSIZE); 1377 kfree_skb(skb); 1378 goto errout; 1379 } 1380 rtnl_notify(skb, net, portid, RTNLGRP_MPLS_ROUTE, nlh, GFP_KERNEL); 1381 1382 return; 1383 errout: 1384 if (err < 0) 1385 rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err); 1386 } 1387 1388 static int resize_platform_label_table(struct net *net, size_t limit) 1389 { 1390 size_t size = sizeof(struct mpls_route *) * limit; 1391 size_t old_limit; 1392 size_t cp_size; 1393 struct mpls_route __rcu **labels = NULL, **old; 1394 struct mpls_route *rt0 = NULL, *rt2 = NULL; 1395 unsigned index; 1396 1397 if (size) { 1398 labels = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); 1399 if (!labels) 1400 labels = vzalloc(size); 1401 1402 if (!labels) 1403 goto nolabels; 1404 } 1405 1406 /* In case the predefined labels need to be populated */ 1407 if (limit > MPLS_LABEL_IPV4NULL) { 1408 struct net_device *lo = net->loopback_dev; 1409 rt0 = mpls_rt_alloc(1, lo->addr_len); 1410 if (!rt0) 1411 goto nort0; 1412 RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo); 1413 rt0->rt_protocol = RTPROT_KERNEL; 1414 rt0->rt_payload_type = MPT_IPV4; 1415 rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE; 1416 rt0->rt_nh->nh_via_alen = lo->addr_len; 1417 memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr, 1418 lo->addr_len); 1419 } 1420 if (limit > MPLS_LABEL_IPV6NULL) { 1421 struct net_device *lo = net->loopback_dev; 1422 rt2 = mpls_rt_alloc(1, lo->addr_len); 1423 if (!rt2) 1424 goto nort2; 1425 RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo); 1426 rt2->rt_protocol = RTPROT_KERNEL; 1427 rt2->rt_payload_type = MPT_IPV6; 1428 rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE; 1429 rt2->rt_nh->nh_via_alen = lo->addr_len; 1430 memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr, 1431 lo->addr_len); 1432 } 1433 1434 rtnl_lock(); 1435 /* Remember the original table */ 1436 old = rtnl_dereference(net->mpls.platform_label); 1437 old_limit = net->mpls.platform_labels; 1438 1439 /* Free any labels beyond the new table */ 1440 for (index = limit; index < old_limit; index++) 1441 mpls_route_update(net, index, NULL, NULL); 1442 1443 /* Copy over the old labels */ 1444 cp_size = size; 1445 if (old_limit < limit) 1446 cp_size = old_limit * sizeof(struct mpls_route *); 1447 1448 memcpy(labels, old, cp_size); 1449 1450 /* If needed set the predefined labels */ 1451 if ((old_limit <= MPLS_LABEL_IPV6NULL) && 1452 (limit > MPLS_LABEL_IPV6NULL)) { 1453 RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6NULL], rt2); 1454 rt2 = NULL; 1455 } 1456 1457 if ((old_limit <= MPLS_LABEL_IPV4NULL) && 1458 (limit > MPLS_LABEL_IPV4NULL)) { 1459 RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4NULL], rt0); 1460 rt0 = NULL; 1461 } 1462 1463 /* Update the global pointers */ 1464 net->mpls.platform_labels = limit; 1465 rcu_assign_pointer(net->mpls.platform_label, labels); 1466 1467 rtnl_unlock(); 1468 1469 mpls_rt_free(rt2); 1470 mpls_rt_free(rt0); 1471 1472 if (old) { 1473 synchronize_rcu(); 1474 kvfree(old); 1475 } 1476 return 0; 1477 1478 nort2: 1479 mpls_rt_free(rt0); 1480 nort0: 1481 kvfree(labels); 1482 nolabels: 1483 return -ENOMEM; 1484 } 1485 1486 static int mpls_platform_labels(struct ctl_table *table, int write, 1487 void __user *buffer, size_t *lenp, loff_t *ppos) 1488 { 1489 struct net *net = table->data; 1490 int platform_labels = net->mpls.platform_labels; 1491 int ret; 1492 struct ctl_table tmp = { 1493 .procname = table->procname, 1494 .data = &platform_labels, 1495 .maxlen = sizeof(int), 1496 .mode = table->mode, 1497 .extra1 = &zero, 1498 .extra2 = &label_limit, 1499 }; 1500 1501 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 1502 1503 if (write && ret == 0) 1504 ret = resize_platform_label_table(net, platform_labels); 1505 1506 return ret; 1507 } 1508 1509 static const struct ctl_table mpls_table[] = { 1510 { 1511 .procname = "platform_labels", 1512 .data = NULL, 1513 .maxlen = sizeof(int), 1514 .mode = 0644, 1515 .proc_handler = mpls_platform_labels, 1516 }, 1517 { } 1518 }; 1519 1520 static int mpls_net_init(struct net *net) 1521 { 1522 struct ctl_table *table; 1523 1524 net->mpls.platform_labels = 0; 1525 net->mpls.platform_label = NULL; 1526 1527 table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL); 1528 if (table == NULL) 1529 return -ENOMEM; 1530 1531 table[0].data = net; 1532 net->mpls.ctl = register_net_sysctl(net, "net/mpls", table); 1533 if (net->mpls.ctl == NULL) { 1534 kfree(table); 1535 return -ENOMEM; 1536 } 1537 1538 return 0; 1539 } 1540 1541 static void mpls_net_exit(struct net *net) 1542 { 1543 struct mpls_route __rcu **platform_label; 1544 size_t platform_labels; 1545 struct ctl_table *table; 1546 unsigned int index; 1547 1548 table = net->mpls.ctl->ctl_table_arg; 1549 unregister_net_sysctl_table(net->mpls.ctl); 1550 kfree(table); 1551 1552 /* An rcu grace period has passed since there was a device in 1553 * the network namespace (and thus the last in flight packet) 1554 * left this network namespace. This is because 1555 * unregister_netdevice_many and netdev_run_todo has completed 1556 * for each network device that was in this network namespace. 1557 * 1558 * As such no additional rcu synchronization is necessary when 1559 * freeing the platform_label table. 1560 */ 1561 rtnl_lock(); 1562 platform_label = rtnl_dereference(net->mpls.platform_label); 1563 platform_labels = net->mpls.platform_labels; 1564 for (index = 0; index < platform_labels; index++) { 1565 struct mpls_route *rt = rtnl_dereference(platform_label[index]); 1566 RCU_INIT_POINTER(platform_label[index], NULL); 1567 mpls_rt_free(rt); 1568 } 1569 rtnl_unlock(); 1570 1571 kvfree(platform_label); 1572 } 1573 1574 static struct pernet_operations mpls_net_ops = { 1575 .init = mpls_net_init, 1576 .exit = mpls_net_exit, 1577 }; 1578 1579 static int __init mpls_init(void) 1580 { 1581 int err; 1582 1583 BUILD_BUG_ON(sizeof(struct mpls_shim_hdr) != 4); 1584 1585 err = register_pernet_subsys(&mpls_net_ops); 1586 if (err) 1587 goto out; 1588 1589 err = register_netdevice_notifier(&mpls_dev_notifier); 1590 if (err) 1591 goto out_unregister_pernet; 1592 1593 dev_add_pack(&mpls_packet_type); 1594 1595 rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL); 1596 rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL); 1597 rtnl_register(PF_MPLS, RTM_GETROUTE, NULL, mpls_dump_routes, NULL); 1598 err = 0; 1599 out: 1600 return err; 1601 1602 out_unregister_pernet: 1603 unregister_pernet_subsys(&mpls_net_ops); 1604 goto out; 1605 } 1606 module_init(mpls_init); 1607 1608 static void __exit mpls_exit(void) 1609 { 1610 rtnl_unregister_all(PF_MPLS); 1611 dev_remove_pack(&mpls_packet_type); 1612 unregister_netdevice_notifier(&mpls_dev_notifier); 1613 unregister_pernet_subsys(&mpls_net_ops); 1614 } 1615 module_exit(mpls_exit); 1616 1617 MODULE_DESCRIPTION("MultiProtocol Label Switching"); 1618 MODULE_LICENSE("GPL v2"); 1619 MODULE_ALIAS_NETPROTO(PF_MPLS); 1620