1 #include <linux/types.h> 2 #include <linux/skbuff.h> 3 #include <linux/socket.h> 4 #include <linux/sysctl.h> 5 #include <linux/net.h> 6 #include <linux/module.h> 7 #include <linux/if_arp.h> 8 #include <linux/ipv6.h> 9 #include <linux/mpls.h> 10 #include <linux/vmalloc.h> 11 #include <net/ip.h> 12 #include <net/dst.h> 13 #include <net/sock.h> 14 #include <net/arp.h> 15 #include <net/ip_fib.h> 16 #include <net/netevent.h> 17 #include <net/netns/generic.h> 18 #if IS_ENABLED(CONFIG_IPV6) 19 #include <net/ipv6.h> 20 #include <net/addrconf.h> 21 #endif 22 #include <net/nexthop.h> 23 #include "internal.h" 24 25 /* Maximum number of labels to look ahead at when selecting a path of 26 * a multipath route 27 */ 28 #define MAX_MP_SELECT_LABELS 4 29 30 #define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1) 31 32 static int zero = 0; 33 static int label_limit = (1 << 20) - 1; 34 35 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, 36 struct nlmsghdr *nlh, struct net *net, u32 portid, 37 unsigned int nlm_flags); 38 39 static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) 40 { 41 struct mpls_route *rt = NULL; 42 43 if (index < net->mpls.platform_labels) { 44 struct mpls_route __rcu **platform_label = 45 rcu_dereference(net->mpls.platform_label); 46 rt = rcu_dereference(platform_label[index]); 47 } 48 return rt; 49 } 50 51 static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) 52 { 53 return rcu_dereference_rtnl(dev->mpls_ptr); 54 } 55 56 bool mpls_output_possible(const struct net_device *dev) 57 { 58 return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); 59 } 60 EXPORT_SYMBOL_GPL(mpls_output_possible); 61 62 static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh) 63 { 64 u8 *nh0_via = PTR_ALIGN((u8 *)&rt->rt_nh[rt->rt_nhn], VIA_ALEN_ALIGN); 65 int nh_index = nh - rt->rt_nh; 66 67 return nh0_via + rt->rt_max_alen * nh_index; 68 } 69 70 static const u8 *mpls_nh_via(const struct mpls_route *rt, 71 const struct mpls_nh *nh) 72 { 73 return __mpls_nh_via((struct mpls_route *)rt, (struct mpls_nh *)nh); 74 } 75 76 static unsigned int mpls_nh_header_size(const struct mpls_nh *nh) 77 { 78 /* The size of the layer 2.5 labels to be added for this route */ 79 return nh->nh_labels * sizeof(struct mpls_shim_hdr); 80 } 81 82 unsigned int mpls_dev_mtu(const struct net_device *dev) 83 { 84 /* The amount of data the layer 2 frame can hold */ 85 return dev->mtu; 86 } 87 EXPORT_SYMBOL_GPL(mpls_dev_mtu); 88 89 bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 90 { 91 if (skb->len <= mtu) 92 return false; 93 94 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) 95 return false; 96 97 return true; 98 } 99 EXPORT_SYMBOL_GPL(mpls_pkt_too_big); 100 101 static u32 mpls_multipath_hash(struct mpls_route *rt, 102 struct sk_buff *skb, bool bos) 103 { 104 struct mpls_entry_decoded dec; 105 struct mpls_shim_hdr *hdr; 106 bool eli_seen = false; 107 int label_index; 108 u32 hash = 0; 109 110 for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos; 111 label_index++) { 112 if (!pskb_may_pull(skb, sizeof(*hdr) * label_index)) 113 break; 114 115 /* Read and decode the current label */ 116 hdr = mpls_hdr(skb) + label_index; 117 dec = mpls_entry_decode(hdr); 118 119 /* RFC6790 - reserved labels MUST NOT be used as keys 120 * for the load-balancing function 121 */ 122 if (likely(dec.label >= MPLS_LABEL_FIRST_UNRESERVED)) { 123 hash = jhash_1word(dec.label, hash); 124 125 /* The entropy label follows the entropy label 126 * indicator, so this means that the entropy 127 * label was just added to the hash - no need to 128 * go any deeper either in the label stack or in the 129 * payload 130 */ 131 if (eli_seen) 132 break; 133 } else if (dec.label == MPLS_LABEL_ENTROPY) { 134 eli_seen = true; 135 } 136 137 bos = dec.bos; 138 if (bos && pskb_may_pull(skb, sizeof(*hdr) * label_index + 139 sizeof(struct iphdr))) { 140 const struct iphdr *v4hdr; 141 142 v4hdr = (const struct iphdr *)(mpls_hdr(skb) + 143 label_index); 144 if (v4hdr->version == 4) { 145 hash = jhash_3words(ntohl(v4hdr->saddr), 146 ntohl(v4hdr->daddr), 147 v4hdr->protocol, hash); 148 } else if (v4hdr->version == 6 && 149 pskb_may_pull(skb, sizeof(*hdr) * label_index + 150 sizeof(struct ipv6hdr))) { 151 const struct ipv6hdr *v6hdr; 152 153 v6hdr = (const struct ipv6hdr *)(mpls_hdr(skb) + 154 label_index); 155 156 hash = __ipv6_addr_jhash(&v6hdr->saddr, hash); 157 hash = __ipv6_addr_jhash(&v6hdr->daddr, hash); 158 hash = jhash_1word(v6hdr->nexthdr, hash); 159 } 160 } 161 } 162 163 return hash; 164 } 165 166 static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, 167 struct sk_buff *skb, bool bos) 168 { 169 int alive = ACCESS_ONCE(rt->rt_nhn_alive); 170 u32 hash = 0; 171 int nh_index = 0; 172 int n = 0; 173 174 /* No need to look further into packet if there's only 175 * one path 176 */ 177 if (rt->rt_nhn == 1) 178 goto out; 179 180 if (alive <= 0) 181 return NULL; 182 183 hash = mpls_multipath_hash(rt, skb, bos); 184 nh_index = hash % alive; 185 if (alive == rt->rt_nhn) 186 goto out; 187 for_nexthops(rt) { 188 if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) 189 continue; 190 if (n == nh_index) 191 return nh; 192 n++; 193 } endfor_nexthops(rt); 194 195 out: 196 return &rt->rt_nh[nh_index]; 197 } 198 199 static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, 200 struct mpls_entry_decoded dec) 201 { 202 enum mpls_payload_type payload_type; 203 bool success = false; 204 205 /* The IPv4 code below accesses through the IPv4 header 206 * checksum, which is 12 bytes into the packet. 207 * The IPv6 code below accesses through the IPv6 hop limit 208 * which is 8 bytes into the packet. 209 * 210 * For all supported cases there should always be at least 12 211 * bytes of packet data present. The IPv4 header is 20 bytes 212 * without options and the IPv6 header is always 40 bytes 213 * long. 214 */ 215 if (!pskb_may_pull(skb, 12)) 216 return false; 217 218 payload_type = rt->rt_payload_type; 219 if (payload_type == MPT_UNSPEC) 220 payload_type = ip_hdr(skb)->version; 221 222 switch (payload_type) { 223 case MPT_IPV4: { 224 struct iphdr *hdr4 = ip_hdr(skb); 225 skb->protocol = htons(ETH_P_IP); 226 csum_replace2(&hdr4->check, 227 htons(hdr4->ttl << 8), 228 htons(dec.ttl << 8)); 229 hdr4->ttl = dec.ttl; 230 success = true; 231 break; 232 } 233 case MPT_IPV6: { 234 struct ipv6hdr *hdr6 = ipv6_hdr(skb); 235 skb->protocol = htons(ETH_P_IPV6); 236 hdr6->hop_limit = dec.ttl; 237 success = true; 238 break; 239 } 240 case MPT_UNSPEC: 241 break; 242 } 243 244 return success; 245 } 246 247 static int mpls_forward(struct sk_buff *skb, struct net_device *dev, 248 struct packet_type *pt, struct net_device *orig_dev) 249 { 250 struct net *net = dev_net(dev); 251 struct mpls_shim_hdr *hdr; 252 struct mpls_route *rt; 253 struct mpls_nh *nh; 254 struct mpls_entry_decoded dec; 255 struct net_device *out_dev; 256 struct mpls_dev *mdev; 257 unsigned int hh_len; 258 unsigned int new_header_size; 259 unsigned int mtu; 260 int err; 261 262 /* Careful this entire function runs inside of an rcu critical section */ 263 264 mdev = mpls_dev_get(dev); 265 if (!mdev || !mdev->input_enabled) 266 goto drop; 267 268 if (skb->pkt_type != PACKET_HOST) 269 goto drop; 270 271 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 272 goto drop; 273 274 if (!pskb_may_pull(skb, sizeof(*hdr))) 275 goto drop; 276 277 /* Read and decode the label */ 278 hdr = mpls_hdr(skb); 279 dec = mpls_entry_decode(hdr); 280 281 /* Pop the label */ 282 skb_pull(skb, sizeof(*hdr)); 283 skb_reset_network_header(skb); 284 285 skb_orphan(skb); 286 287 rt = mpls_route_input_rcu(net, dec.label); 288 if (!rt) 289 goto drop; 290 291 nh = mpls_select_multipath(rt, skb, dec.bos); 292 if (!nh) 293 goto drop; 294 295 /* Find the output device */ 296 out_dev = rcu_dereference(nh->nh_dev); 297 if (!mpls_output_possible(out_dev)) 298 goto drop; 299 300 if (skb_warn_if_lro(skb)) 301 goto drop; 302 303 skb_forward_csum(skb); 304 305 /* Verify ttl is valid */ 306 if (dec.ttl <= 1) 307 goto drop; 308 dec.ttl -= 1; 309 310 /* Verify the destination can hold the packet */ 311 new_header_size = mpls_nh_header_size(nh); 312 mtu = mpls_dev_mtu(out_dev); 313 if (mpls_pkt_too_big(skb, mtu - new_header_size)) 314 goto drop; 315 316 hh_len = LL_RESERVED_SPACE(out_dev); 317 if (!out_dev->header_ops) 318 hh_len = 0; 319 320 /* Ensure there is enough space for the headers in the skb */ 321 if (skb_cow(skb, hh_len + new_header_size)) 322 goto drop; 323 324 skb->dev = out_dev; 325 skb->protocol = htons(ETH_P_MPLS_UC); 326 327 if (unlikely(!new_header_size && dec.bos)) { 328 /* Penultimate hop popping */ 329 if (!mpls_egress(rt, skb, dec)) 330 goto drop; 331 } else { 332 bool bos; 333 int i; 334 skb_push(skb, new_header_size); 335 skb_reset_network_header(skb); 336 /* Push the new labels */ 337 hdr = mpls_hdr(skb); 338 bos = dec.bos; 339 for (i = nh->nh_labels - 1; i >= 0; i--) { 340 hdr[i] = mpls_entry_encode(nh->nh_label[i], 341 dec.ttl, 0, bos); 342 bos = false; 343 } 344 } 345 346 /* If via wasn't specified then send out using device address */ 347 if (nh->nh_via_table == MPLS_NEIGH_TABLE_UNSPEC) 348 err = neigh_xmit(NEIGH_LINK_TABLE, out_dev, 349 out_dev->dev_addr, skb); 350 else 351 err = neigh_xmit(nh->nh_via_table, out_dev, 352 mpls_nh_via(rt, nh), skb); 353 if (err) 354 net_dbg_ratelimited("%s: packet transmission failed: %d\n", 355 __func__, err); 356 return 0; 357 358 drop: 359 kfree_skb(skb); 360 return NET_RX_DROP; 361 } 362 363 static struct packet_type mpls_packet_type __read_mostly = { 364 .type = cpu_to_be16(ETH_P_MPLS_UC), 365 .func = mpls_forward, 366 }; 367 368 static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = { 369 [RTA_DST] = { .type = NLA_U32 }, 370 [RTA_OIF] = { .type = NLA_U32 }, 371 }; 372 373 struct mpls_route_config { 374 u32 rc_protocol; 375 u32 rc_ifindex; 376 u8 rc_via_table; 377 u8 rc_via_alen; 378 u8 rc_via[MAX_VIA_ALEN]; 379 u32 rc_label; 380 u8 rc_output_labels; 381 u32 rc_output_label[MAX_NEW_LABELS]; 382 u32 rc_nlflags; 383 enum mpls_payload_type rc_payload_type; 384 struct nl_info rc_nlinfo; 385 struct rtnexthop *rc_mp; 386 int rc_mp_len; 387 }; 388 389 static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen) 390 { 391 u8 max_alen_aligned = ALIGN(max_alen, VIA_ALEN_ALIGN); 392 struct mpls_route *rt; 393 394 rt = kzalloc(ALIGN(sizeof(*rt) + num_nh * sizeof(*rt->rt_nh), 395 VIA_ALEN_ALIGN) + 396 num_nh * max_alen_aligned, 397 GFP_KERNEL); 398 if (rt) { 399 rt->rt_nhn = num_nh; 400 rt->rt_nhn_alive = num_nh; 401 rt->rt_max_alen = max_alen_aligned; 402 } 403 404 return rt; 405 } 406 407 static void mpls_rt_free(struct mpls_route *rt) 408 { 409 if (rt) 410 kfree_rcu(rt, rt_rcu); 411 } 412 413 static void mpls_notify_route(struct net *net, unsigned index, 414 struct mpls_route *old, struct mpls_route *new, 415 const struct nl_info *info) 416 { 417 struct nlmsghdr *nlh = info ? info->nlh : NULL; 418 unsigned portid = info ? info->portid : 0; 419 int event = new ? RTM_NEWROUTE : RTM_DELROUTE; 420 struct mpls_route *rt = new ? new : old; 421 unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0; 422 /* Ignore reserved labels for now */ 423 if (rt && (index >= MPLS_LABEL_FIRST_UNRESERVED)) 424 rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags); 425 } 426 427 static void mpls_route_update(struct net *net, unsigned index, 428 struct mpls_route *new, 429 const struct nl_info *info) 430 { 431 struct mpls_route __rcu **platform_label; 432 struct mpls_route *rt; 433 434 ASSERT_RTNL(); 435 436 platform_label = rtnl_dereference(net->mpls.platform_label); 437 rt = rtnl_dereference(platform_label[index]); 438 rcu_assign_pointer(platform_label[index], new); 439 440 mpls_notify_route(net, index, rt, new, info); 441 442 /* If we removed a route free it now */ 443 mpls_rt_free(rt); 444 } 445 446 static unsigned find_free_label(struct net *net) 447 { 448 struct mpls_route __rcu **platform_label; 449 size_t platform_labels; 450 unsigned index; 451 452 platform_label = rtnl_dereference(net->mpls.platform_label); 453 platform_labels = net->mpls.platform_labels; 454 for (index = MPLS_LABEL_FIRST_UNRESERVED; index < platform_labels; 455 index++) { 456 if (!rtnl_dereference(platform_label[index])) 457 return index; 458 } 459 return LABEL_NOT_SPECIFIED; 460 } 461 462 #if IS_ENABLED(CONFIG_INET) 463 static struct net_device *inet_fib_lookup_dev(struct net *net, 464 const void *addr) 465 { 466 struct net_device *dev; 467 struct rtable *rt; 468 struct in_addr daddr; 469 470 memcpy(&daddr, addr, sizeof(struct in_addr)); 471 rt = ip_route_output(net, daddr.s_addr, 0, 0, 0); 472 if (IS_ERR(rt)) 473 return ERR_CAST(rt); 474 475 dev = rt->dst.dev; 476 dev_hold(dev); 477 478 ip_rt_put(rt); 479 480 return dev; 481 } 482 #else 483 static struct net_device *inet_fib_lookup_dev(struct net *net, 484 const void *addr) 485 { 486 return ERR_PTR(-EAFNOSUPPORT); 487 } 488 #endif 489 490 #if IS_ENABLED(CONFIG_IPV6) 491 static struct net_device *inet6_fib_lookup_dev(struct net *net, 492 const void *addr) 493 { 494 struct net_device *dev; 495 struct dst_entry *dst; 496 struct flowi6 fl6; 497 int err; 498 499 if (!ipv6_stub) 500 return ERR_PTR(-EAFNOSUPPORT); 501 502 memset(&fl6, 0, sizeof(fl6)); 503 memcpy(&fl6.daddr, addr, sizeof(struct in6_addr)); 504 err = ipv6_stub->ipv6_dst_lookup(net, NULL, &dst, &fl6); 505 if (err) 506 return ERR_PTR(err); 507 508 dev = dst->dev; 509 dev_hold(dev); 510 dst_release(dst); 511 512 return dev; 513 } 514 #else 515 static struct net_device *inet6_fib_lookup_dev(struct net *net, 516 const void *addr) 517 { 518 return ERR_PTR(-EAFNOSUPPORT); 519 } 520 #endif 521 522 static struct net_device *find_outdev(struct net *net, 523 struct mpls_route *rt, 524 struct mpls_nh *nh, int oif) 525 { 526 struct net_device *dev = NULL; 527 528 if (!oif) { 529 switch (nh->nh_via_table) { 530 case NEIGH_ARP_TABLE: 531 dev = inet_fib_lookup_dev(net, mpls_nh_via(rt, nh)); 532 break; 533 case NEIGH_ND_TABLE: 534 dev = inet6_fib_lookup_dev(net, mpls_nh_via(rt, nh)); 535 break; 536 case NEIGH_LINK_TABLE: 537 break; 538 } 539 } else { 540 dev = dev_get_by_index(net, oif); 541 } 542 543 if (!dev) 544 return ERR_PTR(-ENODEV); 545 546 /* The caller is holding rtnl anyways, so release the dev reference */ 547 dev_put(dev); 548 549 return dev; 550 } 551 552 static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, 553 struct mpls_nh *nh, int oif) 554 { 555 struct net_device *dev = NULL; 556 int err = -ENODEV; 557 558 dev = find_outdev(net, rt, nh, oif); 559 if (IS_ERR(dev)) { 560 err = PTR_ERR(dev); 561 dev = NULL; 562 goto errout; 563 } 564 565 /* Ensure this is a supported device */ 566 err = -EINVAL; 567 if (!mpls_dev_get(dev)) 568 goto errout; 569 570 if ((nh->nh_via_table == NEIGH_LINK_TABLE) && 571 (dev->addr_len != nh->nh_via_alen)) 572 goto errout; 573 574 RCU_INIT_POINTER(nh->nh_dev, dev); 575 576 if (!(dev->flags & IFF_UP)) { 577 nh->nh_flags |= RTNH_F_DEAD; 578 } else { 579 unsigned int flags; 580 581 flags = dev_get_flags(dev); 582 if (!(flags & (IFF_RUNNING | IFF_LOWER_UP))) 583 nh->nh_flags |= RTNH_F_LINKDOWN; 584 } 585 586 return 0; 587 588 errout: 589 return err; 590 } 591 592 static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, 593 struct mpls_route *rt) 594 { 595 struct net *net = cfg->rc_nlinfo.nl_net; 596 struct mpls_nh *nh = rt->rt_nh; 597 int err; 598 int i; 599 600 if (!nh) 601 return -ENOMEM; 602 603 err = -EINVAL; 604 /* Ensure only a supported number of labels are present */ 605 if (cfg->rc_output_labels > MAX_NEW_LABELS) 606 goto errout; 607 608 nh->nh_labels = cfg->rc_output_labels; 609 for (i = 0; i < nh->nh_labels; i++) 610 nh->nh_label[i] = cfg->rc_output_label[i]; 611 612 nh->nh_via_table = cfg->rc_via_table; 613 memcpy(__mpls_nh_via(rt, nh), cfg->rc_via, cfg->rc_via_alen); 614 nh->nh_via_alen = cfg->rc_via_alen; 615 616 err = mpls_nh_assign_dev(net, rt, nh, cfg->rc_ifindex); 617 if (err) 618 goto errout; 619 620 if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) 621 rt->rt_nhn_alive--; 622 623 return 0; 624 625 errout: 626 return err; 627 } 628 629 static int mpls_nh_build(struct net *net, struct mpls_route *rt, 630 struct mpls_nh *nh, int oif, struct nlattr *via, 631 struct nlattr *newdst) 632 { 633 int err = -ENOMEM; 634 635 if (!nh) 636 goto errout; 637 638 if (newdst) { 639 err = nla_get_labels(newdst, MAX_NEW_LABELS, 640 &nh->nh_labels, nh->nh_label); 641 if (err) 642 goto errout; 643 } 644 645 if (via) { 646 err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table, 647 __mpls_nh_via(rt, nh)); 648 if (err) 649 goto errout; 650 } else { 651 nh->nh_via_table = MPLS_NEIGH_TABLE_UNSPEC; 652 } 653 654 err = mpls_nh_assign_dev(net, rt, nh, oif); 655 if (err) 656 goto errout; 657 658 return 0; 659 660 errout: 661 return err; 662 } 663 664 static int mpls_count_nexthops(struct rtnexthop *rtnh, int len, 665 u8 cfg_via_alen, u8 *max_via_alen) 666 { 667 int nhs = 0; 668 int remaining = len; 669 670 if (!rtnh) { 671 *max_via_alen = cfg_via_alen; 672 return 1; 673 } 674 675 *max_via_alen = 0; 676 677 while (rtnh_ok(rtnh, remaining)) { 678 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 679 int attrlen; 680 681 attrlen = rtnh_attrlen(rtnh); 682 nla = nla_find(attrs, attrlen, RTA_VIA); 683 if (nla && nla_len(nla) >= 684 offsetof(struct rtvia, rtvia_addr)) { 685 int via_alen = nla_len(nla) - 686 offsetof(struct rtvia, rtvia_addr); 687 688 if (via_alen <= MAX_VIA_ALEN) 689 *max_via_alen = max_t(u16, *max_via_alen, 690 via_alen); 691 } 692 693 nhs++; 694 rtnh = rtnh_next(rtnh, &remaining); 695 } 696 697 /* leftover implies invalid nexthop configuration, discard it */ 698 return remaining > 0 ? 0 : nhs; 699 } 700 701 static int mpls_nh_build_multi(struct mpls_route_config *cfg, 702 struct mpls_route *rt) 703 { 704 struct rtnexthop *rtnh = cfg->rc_mp; 705 struct nlattr *nla_via, *nla_newdst; 706 int remaining = cfg->rc_mp_len; 707 int nhs = 0; 708 int err = 0; 709 710 change_nexthops(rt) { 711 int attrlen; 712 713 nla_via = NULL; 714 nla_newdst = NULL; 715 716 err = -EINVAL; 717 if (!rtnh_ok(rtnh, remaining)) 718 goto errout; 719 720 /* neither weighted multipath nor any flags 721 * are supported 722 */ 723 if (rtnh->rtnh_hops || rtnh->rtnh_flags) 724 goto errout; 725 726 attrlen = rtnh_attrlen(rtnh); 727 if (attrlen > 0) { 728 struct nlattr *attrs = rtnh_attrs(rtnh); 729 730 nla_via = nla_find(attrs, attrlen, RTA_VIA); 731 nla_newdst = nla_find(attrs, attrlen, RTA_NEWDST); 732 } 733 734 err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, 735 rtnh->rtnh_ifindex, nla_via, nla_newdst); 736 if (err) 737 goto errout; 738 739 if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) 740 rt->rt_nhn_alive--; 741 742 rtnh = rtnh_next(rtnh, &remaining); 743 nhs++; 744 } endfor_nexthops(rt); 745 746 rt->rt_nhn = nhs; 747 748 return 0; 749 750 errout: 751 return err; 752 } 753 754 static int mpls_route_add(struct mpls_route_config *cfg) 755 { 756 struct mpls_route __rcu **platform_label; 757 struct net *net = cfg->rc_nlinfo.nl_net; 758 struct mpls_route *rt, *old; 759 int err = -EINVAL; 760 u8 max_via_alen; 761 unsigned index; 762 int nhs; 763 764 index = cfg->rc_label; 765 766 /* If a label was not specified during insert pick one */ 767 if ((index == LABEL_NOT_SPECIFIED) && 768 (cfg->rc_nlflags & NLM_F_CREATE)) { 769 index = find_free_label(net); 770 } 771 772 /* Reserved labels may not be set */ 773 if (index < MPLS_LABEL_FIRST_UNRESERVED) 774 goto errout; 775 776 /* The full 20 bit range may not be supported. */ 777 if (index >= net->mpls.platform_labels) 778 goto errout; 779 780 /* Append makes no sense with mpls */ 781 err = -EOPNOTSUPP; 782 if (cfg->rc_nlflags & NLM_F_APPEND) 783 goto errout; 784 785 err = -EEXIST; 786 platform_label = rtnl_dereference(net->mpls.platform_label); 787 old = rtnl_dereference(platform_label[index]); 788 if ((cfg->rc_nlflags & NLM_F_EXCL) && old) 789 goto errout; 790 791 err = -EEXIST; 792 if (!(cfg->rc_nlflags & NLM_F_REPLACE) && old) 793 goto errout; 794 795 err = -ENOENT; 796 if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old) 797 goto errout; 798 799 err = -EINVAL; 800 nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len, 801 cfg->rc_via_alen, &max_via_alen); 802 if (nhs == 0) 803 goto errout; 804 805 err = -ENOMEM; 806 rt = mpls_rt_alloc(nhs, max_via_alen); 807 if (!rt) 808 goto errout; 809 810 rt->rt_protocol = cfg->rc_protocol; 811 rt->rt_payload_type = cfg->rc_payload_type; 812 813 if (cfg->rc_mp) 814 err = mpls_nh_build_multi(cfg, rt); 815 else 816 err = mpls_nh_build_from_cfg(cfg, rt); 817 if (err) 818 goto freert; 819 820 mpls_route_update(net, index, rt, &cfg->rc_nlinfo); 821 822 return 0; 823 824 freert: 825 mpls_rt_free(rt); 826 errout: 827 return err; 828 } 829 830 static int mpls_route_del(struct mpls_route_config *cfg) 831 { 832 struct net *net = cfg->rc_nlinfo.nl_net; 833 unsigned index; 834 int err = -EINVAL; 835 836 index = cfg->rc_label; 837 838 /* Reserved labels may not be removed */ 839 if (index < MPLS_LABEL_FIRST_UNRESERVED) 840 goto errout; 841 842 /* The full 20 bit range may not be supported */ 843 if (index >= net->mpls.platform_labels) 844 goto errout; 845 846 mpls_route_update(net, index, NULL, &cfg->rc_nlinfo); 847 848 err = 0; 849 errout: 850 return err; 851 } 852 853 #define MPLS_PERDEV_SYSCTL_OFFSET(field) \ 854 (&((struct mpls_dev *)0)->field) 855 856 static const struct ctl_table mpls_dev_table[] = { 857 { 858 .procname = "input", 859 .maxlen = sizeof(int), 860 .mode = 0644, 861 .proc_handler = proc_dointvec, 862 .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled), 863 }, 864 { } 865 }; 866 867 static int mpls_dev_sysctl_register(struct net_device *dev, 868 struct mpls_dev *mdev) 869 { 870 char path[sizeof("net/mpls/conf/") + IFNAMSIZ]; 871 struct ctl_table *table; 872 int i; 873 874 table = kmemdup(&mpls_dev_table, sizeof(mpls_dev_table), GFP_KERNEL); 875 if (!table) 876 goto out; 877 878 /* Table data contains only offsets relative to the base of 879 * the mdev at this point, so make them absolute. 880 */ 881 for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++) 882 table[i].data = (char *)mdev + (uintptr_t)table[i].data; 883 884 snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name); 885 886 mdev->sysctl = register_net_sysctl(dev_net(dev), path, table); 887 if (!mdev->sysctl) 888 goto free; 889 890 return 0; 891 892 free: 893 kfree(table); 894 out: 895 return -ENOBUFS; 896 } 897 898 static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev) 899 { 900 struct ctl_table *table; 901 902 table = mdev->sysctl->ctl_table_arg; 903 unregister_net_sysctl_table(mdev->sysctl); 904 kfree(table); 905 } 906 907 static struct mpls_dev *mpls_add_dev(struct net_device *dev) 908 { 909 struct mpls_dev *mdev; 910 int err = -ENOMEM; 911 912 ASSERT_RTNL(); 913 914 mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); 915 if (!mdev) 916 return ERR_PTR(err); 917 918 err = mpls_dev_sysctl_register(dev, mdev); 919 if (err) 920 goto free; 921 922 rcu_assign_pointer(dev->mpls_ptr, mdev); 923 924 return mdev; 925 926 free: 927 kfree(mdev); 928 return ERR_PTR(err); 929 } 930 931 static void mpls_ifdown(struct net_device *dev, int event) 932 { 933 struct mpls_route __rcu **platform_label; 934 struct net *net = dev_net(dev); 935 unsigned index; 936 937 platform_label = rtnl_dereference(net->mpls.platform_label); 938 for (index = 0; index < net->mpls.platform_labels; index++) { 939 struct mpls_route *rt = rtnl_dereference(platform_label[index]); 940 941 if (!rt) 942 continue; 943 944 change_nexthops(rt) { 945 if (rtnl_dereference(nh->nh_dev) != dev) 946 continue; 947 switch (event) { 948 case NETDEV_DOWN: 949 case NETDEV_UNREGISTER: 950 nh->nh_flags |= RTNH_F_DEAD; 951 /* fall through */ 952 case NETDEV_CHANGE: 953 nh->nh_flags |= RTNH_F_LINKDOWN; 954 ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1; 955 break; 956 } 957 if (event == NETDEV_UNREGISTER) 958 RCU_INIT_POINTER(nh->nh_dev, NULL); 959 } endfor_nexthops(rt); 960 } 961 962 963 return; 964 } 965 966 static void mpls_ifup(struct net_device *dev, unsigned int nh_flags) 967 { 968 struct mpls_route __rcu **platform_label; 969 struct net *net = dev_net(dev); 970 unsigned index; 971 int alive; 972 973 platform_label = rtnl_dereference(net->mpls.platform_label); 974 for (index = 0; index < net->mpls.platform_labels; index++) { 975 struct mpls_route *rt = rtnl_dereference(platform_label[index]); 976 977 if (!rt) 978 continue; 979 980 alive = 0; 981 change_nexthops(rt) { 982 struct net_device *nh_dev = 983 rtnl_dereference(nh->nh_dev); 984 985 if (!(nh->nh_flags & nh_flags)) { 986 alive++; 987 continue; 988 } 989 if (nh_dev != dev) 990 continue; 991 alive++; 992 nh->nh_flags &= ~nh_flags; 993 } endfor_nexthops(rt); 994 995 ACCESS_ONCE(rt->rt_nhn_alive) = alive; 996 } 997 998 return; 999 } 1000 1001 static int mpls_dev_notify(struct notifier_block *this, unsigned long event, 1002 void *ptr) 1003 { 1004 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1005 struct mpls_dev *mdev; 1006 unsigned int flags; 1007 1008 if (event == NETDEV_REGISTER) { 1009 /* For now just support ethernet devices */ 1010 if ((dev->type == ARPHRD_ETHER) || 1011 (dev->type == ARPHRD_LOOPBACK)) { 1012 mdev = mpls_add_dev(dev); 1013 if (IS_ERR(mdev)) 1014 return notifier_from_errno(PTR_ERR(mdev)); 1015 } 1016 return NOTIFY_OK; 1017 } 1018 1019 mdev = mpls_dev_get(dev); 1020 if (!mdev) 1021 return NOTIFY_OK; 1022 1023 switch (event) { 1024 case NETDEV_DOWN: 1025 mpls_ifdown(dev, event); 1026 break; 1027 case NETDEV_UP: 1028 flags = dev_get_flags(dev); 1029 if (flags & (IFF_RUNNING | IFF_LOWER_UP)) 1030 mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); 1031 else 1032 mpls_ifup(dev, RTNH_F_DEAD); 1033 break; 1034 case NETDEV_CHANGE: 1035 flags = dev_get_flags(dev); 1036 if (flags & (IFF_RUNNING | IFF_LOWER_UP)) 1037 mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); 1038 else 1039 mpls_ifdown(dev, event); 1040 break; 1041 case NETDEV_UNREGISTER: 1042 mpls_ifdown(dev, event); 1043 mdev = mpls_dev_get(dev); 1044 if (mdev) { 1045 mpls_dev_sysctl_unregister(mdev); 1046 RCU_INIT_POINTER(dev->mpls_ptr, NULL); 1047 kfree_rcu(mdev, rcu); 1048 } 1049 break; 1050 case NETDEV_CHANGENAME: 1051 mdev = mpls_dev_get(dev); 1052 if (mdev) { 1053 int err; 1054 1055 mpls_dev_sysctl_unregister(mdev); 1056 err = mpls_dev_sysctl_register(dev, mdev); 1057 if (err) 1058 return notifier_from_errno(err); 1059 } 1060 break; 1061 } 1062 return NOTIFY_OK; 1063 } 1064 1065 static struct notifier_block mpls_dev_notifier = { 1066 .notifier_call = mpls_dev_notify, 1067 }; 1068 1069 static int nla_put_via(struct sk_buff *skb, 1070 u8 table, const void *addr, int alen) 1071 { 1072 static const int table_to_family[NEIGH_NR_TABLES + 1] = { 1073 AF_INET, AF_INET6, AF_DECnet, AF_PACKET, 1074 }; 1075 struct nlattr *nla; 1076 struct rtvia *via; 1077 int family = AF_UNSPEC; 1078 1079 nla = nla_reserve(skb, RTA_VIA, alen + 2); 1080 if (!nla) 1081 return -EMSGSIZE; 1082 1083 if (table <= NEIGH_NR_TABLES) 1084 family = table_to_family[table]; 1085 1086 via = nla_data(nla); 1087 via->rtvia_family = family; 1088 memcpy(via->rtvia_addr, addr, alen); 1089 return 0; 1090 } 1091 1092 int nla_put_labels(struct sk_buff *skb, int attrtype, 1093 u8 labels, const u32 label[]) 1094 { 1095 struct nlattr *nla; 1096 struct mpls_shim_hdr *nla_label; 1097 bool bos; 1098 int i; 1099 nla = nla_reserve(skb, attrtype, labels*4); 1100 if (!nla) 1101 return -EMSGSIZE; 1102 1103 nla_label = nla_data(nla); 1104 bos = true; 1105 for (i = labels - 1; i >= 0; i--) { 1106 nla_label[i] = mpls_entry_encode(label[i], 0, 0, bos); 1107 bos = false; 1108 } 1109 1110 return 0; 1111 } 1112 EXPORT_SYMBOL_GPL(nla_put_labels); 1113 1114 int nla_get_labels(const struct nlattr *nla, 1115 u32 max_labels, u8 *labels, u32 label[]) 1116 { 1117 unsigned len = nla_len(nla); 1118 unsigned nla_labels; 1119 struct mpls_shim_hdr *nla_label; 1120 bool bos; 1121 int i; 1122 1123 /* len needs to be an even multiple of 4 (the label size) */ 1124 if (len & 3) 1125 return -EINVAL; 1126 1127 /* Limit the number of new labels allowed */ 1128 nla_labels = len/4; 1129 if (nla_labels > max_labels) 1130 return -EINVAL; 1131 1132 nla_label = nla_data(nla); 1133 bos = true; 1134 for (i = nla_labels - 1; i >= 0; i--, bos = false) { 1135 struct mpls_entry_decoded dec; 1136 dec = mpls_entry_decode(nla_label + i); 1137 1138 /* Ensure the bottom of stack flag is properly set 1139 * and ttl and tc are both clear. 1140 */ 1141 if ((dec.bos != bos) || dec.ttl || dec.tc) 1142 return -EINVAL; 1143 1144 switch (dec.label) { 1145 case MPLS_LABEL_IMPLNULL: 1146 /* RFC3032: This is a label that an LSR may 1147 * assign and distribute, but which never 1148 * actually appears in the encapsulation. 1149 */ 1150 return -EINVAL; 1151 } 1152 1153 label[i] = dec.label; 1154 } 1155 *labels = nla_labels; 1156 return 0; 1157 } 1158 EXPORT_SYMBOL_GPL(nla_get_labels); 1159 1160 int nla_get_via(const struct nlattr *nla, u8 *via_alen, 1161 u8 *via_table, u8 via_addr[]) 1162 { 1163 struct rtvia *via = nla_data(nla); 1164 int err = -EINVAL; 1165 int alen; 1166 1167 if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) 1168 goto errout; 1169 alen = nla_len(nla) - 1170 offsetof(struct rtvia, rtvia_addr); 1171 if (alen > MAX_VIA_ALEN) 1172 goto errout; 1173 1174 /* Validate the address family */ 1175 switch (via->rtvia_family) { 1176 case AF_PACKET: 1177 *via_table = NEIGH_LINK_TABLE; 1178 break; 1179 case AF_INET: 1180 *via_table = NEIGH_ARP_TABLE; 1181 if (alen != 4) 1182 goto errout; 1183 break; 1184 case AF_INET6: 1185 *via_table = NEIGH_ND_TABLE; 1186 if (alen != 16) 1187 goto errout; 1188 break; 1189 default: 1190 /* Unsupported address family */ 1191 goto errout; 1192 } 1193 1194 memcpy(via_addr, via->rtvia_addr, alen); 1195 *via_alen = alen; 1196 err = 0; 1197 1198 errout: 1199 return err; 1200 } 1201 1202 static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, 1203 struct mpls_route_config *cfg) 1204 { 1205 struct rtmsg *rtm; 1206 struct nlattr *tb[RTA_MAX+1]; 1207 int index; 1208 int err; 1209 1210 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy); 1211 if (err < 0) 1212 goto errout; 1213 1214 err = -EINVAL; 1215 rtm = nlmsg_data(nlh); 1216 memset(cfg, 0, sizeof(*cfg)); 1217 1218 if (rtm->rtm_family != AF_MPLS) 1219 goto errout; 1220 if (rtm->rtm_dst_len != 20) 1221 goto errout; 1222 if (rtm->rtm_src_len != 0) 1223 goto errout; 1224 if (rtm->rtm_tos != 0) 1225 goto errout; 1226 if (rtm->rtm_table != RT_TABLE_MAIN) 1227 goto errout; 1228 /* Any value is acceptable for rtm_protocol */ 1229 1230 /* As mpls uses destination specific addresses 1231 * (or source specific address in the case of multicast) 1232 * all addresses have universal scope. 1233 */ 1234 if (rtm->rtm_scope != RT_SCOPE_UNIVERSE) 1235 goto errout; 1236 if (rtm->rtm_type != RTN_UNICAST) 1237 goto errout; 1238 if (rtm->rtm_flags != 0) 1239 goto errout; 1240 1241 cfg->rc_label = LABEL_NOT_SPECIFIED; 1242 cfg->rc_protocol = rtm->rtm_protocol; 1243 cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC; 1244 cfg->rc_nlflags = nlh->nlmsg_flags; 1245 cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid; 1246 cfg->rc_nlinfo.nlh = nlh; 1247 cfg->rc_nlinfo.nl_net = sock_net(skb->sk); 1248 1249 for (index = 0; index <= RTA_MAX; index++) { 1250 struct nlattr *nla = tb[index]; 1251 if (!nla) 1252 continue; 1253 1254 switch(index) { 1255 case RTA_OIF: 1256 cfg->rc_ifindex = nla_get_u32(nla); 1257 break; 1258 case RTA_NEWDST: 1259 if (nla_get_labels(nla, MAX_NEW_LABELS, 1260 &cfg->rc_output_labels, 1261 cfg->rc_output_label)) 1262 goto errout; 1263 break; 1264 case RTA_DST: 1265 { 1266 u8 label_count; 1267 if (nla_get_labels(nla, 1, &label_count, 1268 &cfg->rc_label)) 1269 goto errout; 1270 1271 /* Reserved labels may not be set */ 1272 if (cfg->rc_label < MPLS_LABEL_FIRST_UNRESERVED) 1273 goto errout; 1274 1275 break; 1276 } 1277 case RTA_VIA: 1278 { 1279 if (nla_get_via(nla, &cfg->rc_via_alen, 1280 &cfg->rc_via_table, cfg->rc_via)) 1281 goto errout; 1282 break; 1283 } 1284 case RTA_MULTIPATH: 1285 { 1286 cfg->rc_mp = nla_data(nla); 1287 cfg->rc_mp_len = nla_len(nla); 1288 break; 1289 } 1290 default: 1291 /* Unsupported attribute */ 1292 goto errout; 1293 } 1294 } 1295 1296 err = 0; 1297 errout: 1298 return err; 1299 } 1300 1301 static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) 1302 { 1303 struct mpls_route_config cfg; 1304 int err; 1305 1306 err = rtm_to_route_config(skb, nlh, &cfg); 1307 if (err < 0) 1308 return err; 1309 1310 return mpls_route_del(&cfg); 1311 } 1312 1313 1314 static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) 1315 { 1316 struct mpls_route_config cfg; 1317 int err; 1318 1319 err = rtm_to_route_config(skb, nlh, &cfg); 1320 if (err < 0) 1321 return err; 1322 1323 return mpls_route_add(&cfg); 1324 } 1325 1326 static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, 1327 u32 label, struct mpls_route *rt, int flags) 1328 { 1329 struct net_device *dev; 1330 struct nlmsghdr *nlh; 1331 struct rtmsg *rtm; 1332 1333 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); 1334 if (nlh == NULL) 1335 return -EMSGSIZE; 1336 1337 rtm = nlmsg_data(nlh); 1338 rtm->rtm_family = AF_MPLS; 1339 rtm->rtm_dst_len = 20; 1340 rtm->rtm_src_len = 0; 1341 rtm->rtm_tos = 0; 1342 rtm->rtm_table = RT_TABLE_MAIN; 1343 rtm->rtm_protocol = rt->rt_protocol; 1344 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 1345 rtm->rtm_type = RTN_UNICAST; 1346 rtm->rtm_flags = 0; 1347 1348 if (nla_put_labels(skb, RTA_DST, 1, &label)) 1349 goto nla_put_failure; 1350 if (rt->rt_nhn == 1) { 1351 const struct mpls_nh *nh = rt->rt_nh; 1352 1353 if (nh->nh_labels && 1354 nla_put_labels(skb, RTA_NEWDST, nh->nh_labels, 1355 nh->nh_label)) 1356 goto nla_put_failure; 1357 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC && 1358 nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh), 1359 nh->nh_via_alen)) 1360 goto nla_put_failure; 1361 dev = rtnl_dereference(nh->nh_dev); 1362 if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) 1363 goto nla_put_failure; 1364 if (nh->nh_flags & RTNH_F_LINKDOWN) 1365 rtm->rtm_flags |= RTNH_F_LINKDOWN; 1366 if (nh->nh_flags & RTNH_F_DEAD) 1367 rtm->rtm_flags |= RTNH_F_DEAD; 1368 } else { 1369 struct rtnexthop *rtnh; 1370 struct nlattr *mp; 1371 int dead = 0; 1372 int linkdown = 0; 1373 1374 mp = nla_nest_start(skb, RTA_MULTIPATH); 1375 if (!mp) 1376 goto nla_put_failure; 1377 1378 for_nexthops(rt) { 1379 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 1380 if (!rtnh) 1381 goto nla_put_failure; 1382 1383 dev = rtnl_dereference(nh->nh_dev); 1384 if (dev) 1385 rtnh->rtnh_ifindex = dev->ifindex; 1386 if (nh->nh_flags & RTNH_F_LINKDOWN) { 1387 rtnh->rtnh_flags |= RTNH_F_LINKDOWN; 1388 linkdown++; 1389 } 1390 if (nh->nh_flags & RTNH_F_DEAD) { 1391 rtnh->rtnh_flags |= RTNH_F_DEAD; 1392 dead++; 1393 } 1394 1395 if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST, 1396 nh->nh_labels, 1397 nh->nh_label)) 1398 goto nla_put_failure; 1399 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC && 1400 nla_put_via(skb, nh->nh_via_table, 1401 mpls_nh_via(rt, nh), 1402 nh->nh_via_alen)) 1403 goto nla_put_failure; 1404 1405 /* length of rtnetlink header + attributes */ 1406 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; 1407 } endfor_nexthops(rt); 1408 1409 if (linkdown == rt->rt_nhn) 1410 rtm->rtm_flags |= RTNH_F_LINKDOWN; 1411 if (dead == rt->rt_nhn) 1412 rtm->rtm_flags |= RTNH_F_DEAD; 1413 1414 nla_nest_end(skb, mp); 1415 } 1416 1417 nlmsg_end(skb, nlh); 1418 return 0; 1419 1420 nla_put_failure: 1421 nlmsg_cancel(skb, nlh); 1422 return -EMSGSIZE; 1423 } 1424 1425 static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) 1426 { 1427 struct net *net = sock_net(skb->sk); 1428 struct mpls_route __rcu **platform_label; 1429 size_t platform_labels; 1430 unsigned int index; 1431 1432 ASSERT_RTNL(); 1433 1434 index = cb->args[0]; 1435 if (index < MPLS_LABEL_FIRST_UNRESERVED) 1436 index = MPLS_LABEL_FIRST_UNRESERVED; 1437 1438 platform_label = rtnl_dereference(net->mpls.platform_label); 1439 platform_labels = net->mpls.platform_labels; 1440 for (; index < platform_labels; index++) { 1441 struct mpls_route *rt; 1442 rt = rtnl_dereference(platform_label[index]); 1443 if (!rt) 1444 continue; 1445 1446 if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid, 1447 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1448 index, rt, NLM_F_MULTI) < 0) 1449 break; 1450 } 1451 cb->args[0] = index; 1452 1453 return skb->len; 1454 } 1455 1456 static inline size_t lfib_nlmsg_size(struct mpls_route *rt) 1457 { 1458 size_t payload = 1459 NLMSG_ALIGN(sizeof(struct rtmsg)) 1460 + nla_total_size(4); /* RTA_DST */ 1461 1462 if (rt->rt_nhn == 1) { 1463 struct mpls_nh *nh = rt->rt_nh; 1464 1465 if (nh->nh_dev) 1466 payload += nla_total_size(4); /* RTA_OIF */ 1467 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) /* RTA_VIA */ 1468 payload += nla_total_size(2 + nh->nh_via_alen); 1469 if (nh->nh_labels) /* RTA_NEWDST */ 1470 payload += nla_total_size(nh->nh_labels * 4); 1471 } else { 1472 /* each nexthop is packed in an attribute */ 1473 size_t nhsize = 0; 1474 1475 for_nexthops(rt) { 1476 nhsize += nla_total_size(sizeof(struct rtnexthop)); 1477 /* RTA_VIA */ 1478 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) 1479 nhsize += nla_total_size(2 + nh->nh_via_alen); 1480 if (nh->nh_labels) 1481 nhsize += nla_total_size(nh->nh_labels * 4); 1482 } endfor_nexthops(rt); 1483 /* nested attribute */ 1484 payload += nla_total_size(nhsize); 1485 } 1486 1487 return payload; 1488 } 1489 1490 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, 1491 struct nlmsghdr *nlh, struct net *net, u32 portid, 1492 unsigned int nlm_flags) 1493 { 1494 struct sk_buff *skb; 1495 u32 seq = nlh ? nlh->nlmsg_seq : 0; 1496 int err = -ENOBUFS; 1497 1498 skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL); 1499 if (skb == NULL) 1500 goto errout; 1501 1502 err = mpls_dump_route(skb, portid, seq, event, label, rt, nlm_flags); 1503 if (err < 0) { 1504 /* -EMSGSIZE implies BUG in lfib_nlmsg_size */ 1505 WARN_ON(err == -EMSGSIZE); 1506 kfree_skb(skb); 1507 goto errout; 1508 } 1509 rtnl_notify(skb, net, portid, RTNLGRP_MPLS_ROUTE, nlh, GFP_KERNEL); 1510 1511 return; 1512 errout: 1513 if (err < 0) 1514 rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err); 1515 } 1516 1517 static int resize_platform_label_table(struct net *net, size_t limit) 1518 { 1519 size_t size = sizeof(struct mpls_route *) * limit; 1520 size_t old_limit; 1521 size_t cp_size; 1522 struct mpls_route __rcu **labels = NULL, **old; 1523 struct mpls_route *rt0 = NULL, *rt2 = NULL; 1524 unsigned index; 1525 1526 if (size) { 1527 labels = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); 1528 if (!labels) 1529 labels = vzalloc(size); 1530 1531 if (!labels) 1532 goto nolabels; 1533 } 1534 1535 /* In case the predefined labels need to be populated */ 1536 if (limit > MPLS_LABEL_IPV4NULL) { 1537 struct net_device *lo = net->loopback_dev; 1538 rt0 = mpls_rt_alloc(1, lo->addr_len); 1539 if (!rt0) 1540 goto nort0; 1541 RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo); 1542 rt0->rt_protocol = RTPROT_KERNEL; 1543 rt0->rt_payload_type = MPT_IPV4; 1544 rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE; 1545 rt0->rt_nh->nh_via_alen = lo->addr_len; 1546 memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr, 1547 lo->addr_len); 1548 } 1549 if (limit > MPLS_LABEL_IPV6NULL) { 1550 struct net_device *lo = net->loopback_dev; 1551 rt2 = mpls_rt_alloc(1, lo->addr_len); 1552 if (!rt2) 1553 goto nort2; 1554 RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo); 1555 rt2->rt_protocol = RTPROT_KERNEL; 1556 rt2->rt_payload_type = MPT_IPV6; 1557 rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE; 1558 rt2->rt_nh->nh_via_alen = lo->addr_len; 1559 memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr, 1560 lo->addr_len); 1561 } 1562 1563 rtnl_lock(); 1564 /* Remember the original table */ 1565 old = rtnl_dereference(net->mpls.platform_label); 1566 old_limit = net->mpls.platform_labels; 1567 1568 /* Free any labels beyond the new table */ 1569 for (index = limit; index < old_limit; index++) 1570 mpls_route_update(net, index, NULL, NULL); 1571 1572 /* Copy over the old labels */ 1573 cp_size = size; 1574 if (old_limit < limit) 1575 cp_size = old_limit * sizeof(struct mpls_route *); 1576 1577 memcpy(labels, old, cp_size); 1578 1579 /* If needed set the predefined labels */ 1580 if ((old_limit <= MPLS_LABEL_IPV6NULL) && 1581 (limit > MPLS_LABEL_IPV6NULL)) { 1582 RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6NULL], rt2); 1583 rt2 = NULL; 1584 } 1585 1586 if ((old_limit <= MPLS_LABEL_IPV4NULL) && 1587 (limit > MPLS_LABEL_IPV4NULL)) { 1588 RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4NULL], rt0); 1589 rt0 = NULL; 1590 } 1591 1592 /* Update the global pointers */ 1593 net->mpls.platform_labels = limit; 1594 rcu_assign_pointer(net->mpls.platform_label, labels); 1595 1596 rtnl_unlock(); 1597 1598 mpls_rt_free(rt2); 1599 mpls_rt_free(rt0); 1600 1601 if (old) { 1602 synchronize_rcu(); 1603 kvfree(old); 1604 } 1605 return 0; 1606 1607 nort2: 1608 mpls_rt_free(rt0); 1609 nort0: 1610 kvfree(labels); 1611 nolabels: 1612 return -ENOMEM; 1613 } 1614 1615 static int mpls_platform_labels(struct ctl_table *table, int write, 1616 void __user *buffer, size_t *lenp, loff_t *ppos) 1617 { 1618 struct net *net = table->data; 1619 int platform_labels = net->mpls.platform_labels; 1620 int ret; 1621 struct ctl_table tmp = { 1622 .procname = table->procname, 1623 .data = &platform_labels, 1624 .maxlen = sizeof(int), 1625 .mode = table->mode, 1626 .extra1 = &zero, 1627 .extra2 = &label_limit, 1628 }; 1629 1630 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 1631 1632 if (write && ret == 0) 1633 ret = resize_platform_label_table(net, platform_labels); 1634 1635 return ret; 1636 } 1637 1638 static const struct ctl_table mpls_table[] = { 1639 { 1640 .procname = "platform_labels", 1641 .data = NULL, 1642 .maxlen = sizeof(int), 1643 .mode = 0644, 1644 .proc_handler = mpls_platform_labels, 1645 }, 1646 { } 1647 }; 1648 1649 static int mpls_net_init(struct net *net) 1650 { 1651 struct ctl_table *table; 1652 1653 net->mpls.platform_labels = 0; 1654 net->mpls.platform_label = NULL; 1655 1656 table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL); 1657 if (table == NULL) 1658 return -ENOMEM; 1659 1660 table[0].data = net; 1661 net->mpls.ctl = register_net_sysctl(net, "net/mpls", table); 1662 if (net->mpls.ctl == NULL) { 1663 kfree(table); 1664 return -ENOMEM; 1665 } 1666 1667 return 0; 1668 } 1669 1670 static void mpls_net_exit(struct net *net) 1671 { 1672 struct mpls_route __rcu **platform_label; 1673 size_t platform_labels; 1674 struct ctl_table *table; 1675 unsigned int index; 1676 1677 table = net->mpls.ctl->ctl_table_arg; 1678 unregister_net_sysctl_table(net->mpls.ctl); 1679 kfree(table); 1680 1681 /* An rcu grace period has passed since there was a device in 1682 * the network namespace (and thus the last in flight packet) 1683 * left this network namespace. This is because 1684 * unregister_netdevice_many and netdev_run_todo has completed 1685 * for each network device that was in this network namespace. 1686 * 1687 * As such no additional rcu synchronization is necessary when 1688 * freeing the platform_label table. 1689 */ 1690 rtnl_lock(); 1691 platform_label = rtnl_dereference(net->mpls.platform_label); 1692 platform_labels = net->mpls.platform_labels; 1693 for (index = 0; index < platform_labels; index++) { 1694 struct mpls_route *rt = rtnl_dereference(platform_label[index]); 1695 RCU_INIT_POINTER(platform_label[index], NULL); 1696 mpls_rt_free(rt); 1697 } 1698 rtnl_unlock(); 1699 1700 kvfree(platform_label); 1701 } 1702 1703 static struct pernet_operations mpls_net_ops = { 1704 .init = mpls_net_init, 1705 .exit = mpls_net_exit, 1706 }; 1707 1708 static int __init mpls_init(void) 1709 { 1710 int err; 1711 1712 BUILD_BUG_ON(sizeof(struct mpls_shim_hdr) != 4); 1713 1714 err = register_pernet_subsys(&mpls_net_ops); 1715 if (err) 1716 goto out; 1717 1718 err = register_netdevice_notifier(&mpls_dev_notifier); 1719 if (err) 1720 goto out_unregister_pernet; 1721 1722 dev_add_pack(&mpls_packet_type); 1723 1724 rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL); 1725 rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL); 1726 rtnl_register(PF_MPLS, RTM_GETROUTE, NULL, mpls_dump_routes, NULL); 1727 err = 0; 1728 out: 1729 return err; 1730 1731 out_unregister_pernet: 1732 unregister_pernet_subsys(&mpls_net_ops); 1733 goto out; 1734 } 1735 module_init(mpls_init); 1736 1737 static void __exit mpls_exit(void) 1738 { 1739 rtnl_unregister_all(PF_MPLS); 1740 dev_remove_pack(&mpls_packet_type); 1741 unregister_netdevice_notifier(&mpls_dev_notifier); 1742 unregister_pernet_subsys(&mpls_net_ops); 1743 } 1744 module_exit(mpls_exit); 1745 1746 MODULE_DESCRIPTION("MultiProtocol Label Switching"); 1747 MODULE_LICENSE("GPL v2"); 1748 MODULE_ALIAS_NETPROTO(PF_MPLS); 1749