1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * SR-IPv6 implementation 4 * 5 * Authors: 6 * David Lebrun <david.lebrun@uclouvain.be> 7 * eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com> 8 */ 9 10 #include <linux/types.h> 11 #include <linux/skbuff.h> 12 #include <linux/net.h> 13 #include <linux/module.h> 14 #include <net/ip.h> 15 #include <net/lwtunnel.h> 16 #include <net/netevent.h> 17 #include <net/netns/generic.h> 18 #include <net/ip6_fib.h> 19 #include <net/route.h> 20 #include <net/seg6.h> 21 #include <linux/seg6.h> 22 #include <linux/seg6_local.h> 23 #include <net/addrconf.h> 24 #include <net/ip6_route.h> 25 #include <net/dst_cache.h> 26 #include <net/ip_tunnels.h> 27 #ifdef CONFIG_IPV6_SEG6_HMAC 28 #include <net/seg6_hmac.h> 29 #endif 30 #include <net/seg6_local.h> 31 #include <linux/etherdevice.h> 32 #include <linux/bpf.h> 33 #include <linux/netfilter.h> 34 35 #define SEG6_F_ATTR(i) BIT(i) 36 37 struct seg6_local_lwt; 38 39 /* callbacks used for customizing the creation and destruction of a behavior */ 40 struct seg6_local_lwtunnel_ops { 41 int (*build_state)(struct seg6_local_lwt *slwt, const void *cfg, 42 struct netlink_ext_ack *extack); 43 void (*destroy_state)(struct seg6_local_lwt *slwt); 44 }; 45 46 struct seg6_action_desc { 47 int action; 48 unsigned long attrs; 49 50 /* The optattrs field is used for specifying all the optional 51 * attributes supported by a specific behavior. 52 * It means that if one of these attributes is not provided in the 53 * netlink message during the behavior creation, no errors will be 54 * returned to the userspace. 55 * 56 * Each attribute can be only of two types (mutually exclusive): 57 * 1) required or 2) optional. 58 * Every user MUST obey to this rule! If you set an attribute as 59 * required the same attribute CANNOT be set as optional and vice 60 * versa. 61 */ 62 unsigned long optattrs; 63 64 int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt); 65 int static_headroom; 66 67 struct seg6_local_lwtunnel_ops slwt_ops; 68 }; 69 70 struct bpf_lwt_prog { 71 struct bpf_prog *prog; 72 char *name; 73 }; 74 75 enum seg6_end_dt_mode { 76 DT_INVALID_MODE = -EINVAL, 77 DT_LEGACY_MODE = 0, 78 DT_VRF_MODE = 1, 79 }; 80 81 struct seg6_end_dt_info { 82 enum seg6_end_dt_mode mode; 83 84 struct net *net; 85 /* VRF device associated to the routing table used by the SRv6 86 * End.DT4/DT6 behavior for routing IPv4/IPv6 packets. 87 */ 88 int vrf_ifindex; 89 int vrf_table; 90 91 /* tunneled packet family (IPv4 or IPv6). 92 * Protocol and header length are inferred from family. 93 */ 94 u16 family; 95 }; 96 97 struct pcpu_seg6_local_counters { 98 u64_stats_t packets; 99 u64_stats_t bytes; 100 u64_stats_t errors; 101 102 struct u64_stats_sync syncp; 103 }; 104 105 /* This struct groups all the SRv6 Behavior counters supported so far. 106 * 107 * put_nla_counters() makes use of this data structure to collect all counter 108 * values after the per-CPU counter evaluation has been performed. 109 * Finally, each counter value (in seg6_local_counters) is stored in the 110 * corresponding netlink attribute and sent to user space. 111 * 112 * NB: we don't want to expose this structure to user space! 113 */ 114 struct seg6_local_counters { 115 __u64 packets; 116 __u64 bytes; 117 __u64 errors; 118 }; 119 120 #define seg6_local_alloc_pcpu_counters(__gfp) \ 121 __netdev_alloc_pcpu_stats(struct pcpu_seg6_local_counters, \ 122 ((__gfp) | __GFP_ZERO)) 123 124 #define SEG6_F_LOCAL_COUNTERS SEG6_F_ATTR(SEG6_LOCAL_COUNTERS) 125 126 struct seg6_local_lwt { 127 int action; 128 struct ipv6_sr_hdr *srh; 129 int table; 130 struct in_addr nh4; 131 struct in6_addr nh6; 132 int iif; 133 int oif; 134 struct bpf_lwt_prog bpf; 135 #ifdef CONFIG_NET_L3_MASTER_DEV 136 struct seg6_end_dt_info dt_info; 137 #endif 138 struct pcpu_seg6_local_counters __percpu *pcpu_counters; 139 140 int headroom; 141 struct seg6_action_desc *desc; 142 /* unlike the required attrs, we have to track the optional attributes 143 * that have been effectively parsed. 144 */ 145 unsigned long parsed_optattrs; 146 }; 147 148 static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt) 149 { 150 return (struct seg6_local_lwt *)lwt->data; 151 } 152 153 static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb, int flags) 154 { 155 struct ipv6_sr_hdr *srh; 156 int len, srhoff = 0; 157 158 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, &flags) < 0) 159 return NULL; 160 161 if (!pskb_may_pull(skb, srhoff + sizeof(*srh))) 162 return NULL; 163 164 srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); 165 166 len = (srh->hdrlen + 1) << 3; 167 168 if (!pskb_may_pull(skb, srhoff + len)) 169 return NULL; 170 171 /* note that pskb_may_pull may change pointers in header; 172 * for this reason it is necessary to reload them when needed. 173 */ 174 srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); 175 176 if (!seg6_validate_srh(srh, len, true)) 177 return NULL; 178 179 return srh; 180 } 181 182 static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb) 183 { 184 struct ipv6_sr_hdr *srh; 185 186 srh = get_srh(skb, IP6_FH_F_SKIP_RH); 187 if (!srh) 188 return NULL; 189 190 #ifdef CONFIG_IPV6_SEG6_HMAC 191 if (!seg6_hmac_validate_skb(skb)) 192 return NULL; 193 #endif 194 195 return srh; 196 } 197 198 static bool decap_and_validate(struct sk_buff *skb, int proto) 199 { 200 struct ipv6_sr_hdr *srh; 201 unsigned int off = 0; 202 203 srh = get_srh(skb, 0); 204 if (srh && srh->segments_left > 0) 205 return false; 206 207 #ifdef CONFIG_IPV6_SEG6_HMAC 208 if (srh && !seg6_hmac_validate_skb(skb)) 209 return false; 210 #endif 211 212 if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0) 213 return false; 214 215 if (!pskb_pull(skb, off)) 216 return false; 217 218 skb_postpull_rcsum(skb, skb_network_header(skb), off); 219 220 skb_reset_network_header(skb); 221 skb_reset_transport_header(skb); 222 if (iptunnel_pull_offloads(skb)) 223 return false; 224 225 return true; 226 } 227 228 static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr) 229 { 230 struct in6_addr *addr; 231 232 srh->segments_left--; 233 addr = srh->segments + srh->segments_left; 234 *daddr = *addr; 235 } 236 237 static int 238 seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, 239 u32 tbl_id, bool local_delivery) 240 { 241 struct net *net = dev_net(skb->dev); 242 struct ipv6hdr *hdr = ipv6_hdr(skb); 243 int flags = RT6_LOOKUP_F_HAS_SADDR; 244 struct dst_entry *dst = NULL; 245 struct rt6_info *rt; 246 struct flowi6 fl6; 247 int dev_flags = 0; 248 249 fl6.flowi6_iif = skb->dev->ifindex; 250 fl6.daddr = nhaddr ? *nhaddr : hdr->daddr; 251 fl6.saddr = hdr->saddr; 252 fl6.flowlabel = ip6_flowinfo(hdr); 253 fl6.flowi6_mark = skb->mark; 254 fl6.flowi6_proto = hdr->nexthdr; 255 256 if (nhaddr) 257 fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; 258 259 if (!tbl_id) { 260 dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags); 261 } else { 262 struct fib6_table *table; 263 264 table = fib6_get_table(net, tbl_id); 265 if (!table) 266 goto out; 267 268 rt = ip6_pol_route(net, table, 0, &fl6, skb, flags); 269 dst = &rt->dst; 270 } 271 272 /* we want to discard traffic destined for local packet processing, 273 * if @local_delivery is set to false. 274 */ 275 if (!local_delivery) 276 dev_flags |= IFF_LOOPBACK; 277 278 if (dst && (dst->dev->flags & dev_flags) && !dst->error) { 279 dst_release(dst); 280 dst = NULL; 281 } 282 283 out: 284 if (!dst) { 285 rt = net->ipv6.ip6_blk_hole_entry; 286 dst = &rt->dst; 287 dst_hold(dst); 288 } 289 290 skb_dst_drop(skb); 291 skb_dst_set(skb, dst); 292 return dst->error; 293 } 294 295 int seg6_lookup_nexthop(struct sk_buff *skb, 296 struct in6_addr *nhaddr, u32 tbl_id) 297 { 298 return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false); 299 } 300 301 /* regular endpoint function */ 302 static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt) 303 { 304 struct ipv6_sr_hdr *srh; 305 306 srh = get_and_validate_srh(skb); 307 if (!srh) 308 goto drop; 309 310 advance_nextseg(srh, &ipv6_hdr(skb)->daddr); 311 312 seg6_lookup_nexthop(skb, NULL, 0); 313 314 return dst_input(skb); 315 316 drop: 317 kfree_skb(skb); 318 return -EINVAL; 319 } 320 321 /* regular endpoint, and forward to specified nexthop */ 322 static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt) 323 { 324 struct ipv6_sr_hdr *srh; 325 326 srh = get_and_validate_srh(skb); 327 if (!srh) 328 goto drop; 329 330 advance_nextseg(srh, &ipv6_hdr(skb)->daddr); 331 332 seg6_lookup_nexthop(skb, &slwt->nh6, 0); 333 334 return dst_input(skb); 335 336 drop: 337 kfree_skb(skb); 338 return -EINVAL; 339 } 340 341 static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt) 342 { 343 struct ipv6_sr_hdr *srh; 344 345 srh = get_and_validate_srh(skb); 346 if (!srh) 347 goto drop; 348 349 advance_nextseg(srh, &ipv6_hdr(skb)->daddr); 350 351 seg6_lookup_nexthop(skb, NULL, slwt->table); 352 353 return dst_input(skb); 354 355 drop: 356 kfree_skb(skb); 357 return -EINVAL; 358 } 359 360 /* decapsulate and forward inner L2 frame on specified interface */ 361 static int input_action_end_dx2(struct sk_buff *skb, 362 struct seg6_local_lwt *slwt) 363 { 364 struct net *net = dev_net(skb->dev); 365 struct net_device *odev; 366 struct ethhdr *eth; 367 368 if (!decap_and_validate(skb, IPPROTO_ETHERNET)) 369 goto drop; 370 371 if (!pskb_may_pull(skb, ETH_HLEN)) 372 goto drop; 373 374 skb_reset_mac_header(skb); 375 eth = (struct ethhdr *)skb->data; 376 377 /* To determine the frame's protocol, we assume it is 802.3. This avoids 378 * a call to eth_type_trans(), which is not really relevant for our 379 * use case. 380 */ 381 if (!eth_proto_is_802_3(eth->h_proto)) 382 goto drop; 383 384 odev = dev_get_by_index_rcu(net, slwt->oif); 385 if (!odev) 386 goto drop; 387 388 /* As we accept Ethernet frames, make sure the egress device is of 389 * the correct type. 390 */ 391 if (odev->type != ARPHRD_ETHER) 392 goto drop; 393 394 if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev)) 395 goto drop; 396 397 skb_orphan(skb); 398 399 if (skb_warn_if_lro(skb)) 400 goto drop; 401 402 skb_forward_csum(skb); 403 404 if (skb->len - ETH_HLEN > odev->mtu) 405 goto drop; 406 407 skb->dev = odev; 408 skb->protocol = eth->h_proto; 409 410 return dev_queue_xmit(skb); 411 412 drop: 413 kfree_skb(skb); 414 return -EINVAL; 415 } 416 417 static int input_action_end_dx6_finish(struct net *net, struct sock *sk, 418 struct sk_buff *skb) 419 { 420 struct dst_entry *orig_dst = skb_dst(skb); 421 struct in6_addr *nhaddr = NULL; 422 struct seg6_local_lwt *slwt; 423 424 slwt = seg6_local_lwtunnel(orig_dst->lwtstate); 425 426 /* The inner packet is not associated to any local interface, 427 * so we do not call netif_rx(). 428 * 429 * If slwt->nh6 is set to ::, then lookup the nexthop for the 430 * inner packet's DA. Otherwise, use the specified nexthop. 431 */ 432 if (!ipv6_addr_any(&slwt->nh6)) 433 nhaddr = &slwt->nh6; 434 435 seg6_lookup_nexthop(skb, nhaddr, 0); 436 437 return dst_input(skb); 438 } 439 440 /* decapsulate and forward to specified nexthop */ 441 static int input_action_end_dx6(struct sk_buff *skb, 442 struct seg6_local_lwt *slwt) 443 { 444 /* this function accepts IPv6 encapsulated packets, with either 445 * an SRH with SL=0, or no SRH. 446 */ 447 448 if (!decap_and_validate(skb, IPPROTO_IPV6)) 449 goto drop; 450 451 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 452 goto drop; 453 454 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 455 nf_reset_ct(skb); 456 457 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 458 return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, 459 dev_net(skb->dev), NULL, skb, NULL, 460 skb_dst(skb)->dev, input_action_end_dx6_finish); 461 462 return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb); 463 drop: 464 kfree_skb(skb); 465 return -EINVAL; 466 } 467 468 static int input_action_end_dx4_finish(struct net *net, struct sock *sk, 469 struct sk_buff *skb) 470 { 471 struct dst_entry *orig_dst = skb_dst(skb); 472 struct seg6_local_lwt *slwt; 473 struct iphdr *iph; 474 __be32 nhaddr; 475 int err; 476 477 slwt = seg6_local_lwtunnel(orig_dst->lwtstate); 478 479 iph = ip_hdr(skb); 480 481 nhaddr = slwt->nh4.s_addr ?: iph->daddr; 482 483 skb_dst_drop(skb); 484 485 err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev); 486 if (err) { 487 kfree_skb(skb); 488 return -EINVAL; 489 } 490 491 return dst_input(skb); 492 } 493 494 static int input_action_end_dx4(struct sk_buff *skb, 495 struct seg6_local_lwt *slwt) 496 { 497 if (!decap_and_validate(skb, IPPROTO_IPIP)) 498 goto drop; 499 500 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 501 goto drop; 502 503 skb->protocol = htons(ETH_P_IP); 504 skb_set_transport_header(skb, sizeof(struct iphdr)); 505 nf_reset_ct(skb); 506 507 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 508 return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, 509 dev_net(skb->dev), NULL, skb, NULL, 510 skb_dst(skb)->dev, input_action_end_dx4_finish); 511 512 return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb); 513 drop: 514 kfree_skb(skb); 515 return -EINVAL; 516 } 517 518 #ifdef CONFIG_NET_L3_MASTER_DEV 519 static struct net *fib6_config_get_net(const struct fib6_config *fib6_cfg) 520 { 521 const struct nl_info *nli = &fib6_cfg->fc_nlinfo; 522 523 return nli->nl_net; 524 } 525 526 static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg, 527 u16 family, struct netlink_ext_ack *extack) 528 { 529 struct seg6_end_dt_info *info = &slwt->dt_info; 530 int vrf_ifindex; 531 struct net *net; 532 533 net = fib6_config_get_net(cfg); 534 535 /* note that vrf_table was already set by parse_nla_vrftable() */ 536 vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net, 537 info->vrf_table); 538 if (vrf_ifindex < 0) { 539 if (vrf_ifindex == -EPERM) { 540 NL_SET_ERR_MSG(extack, 541 "Strict mode for VRF is disabled"); 542 } else if (vrf_ifindex == -ENODEV) { 543 NL_SET_ERR_MSG(extack, 544 "Table has no associated VRF device"); 545 } else { 546 pr_debug("seg6local: SRv6 End.DT* creation error=%d\n", 547 vrf_ifindex); 548 } 549 550 return vrf_ifindex; 551 } 552 553 info->net = net; 554 info->vrf_ifindex = vrf_ifindex; 555 556 info->family = family; 557 info->mode = DT_VRF_MODE; 558 559 return 0; 560 } 561 562 /* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and 563 * routes the IPv4/IPv6 packet by looking at the configured routing table. 564 * 565 * In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment 566 * Routing Header packets) from several interfaces and the outer IPv6 567 * destination address (DA) is used for retrieving the specific instance of the 568 * End.DT4/DT6 behavior that should process the packets. 569 * 570 * However, the inner IPv4/IPv6 packet is not really bound to any receiving 571 * interface and thus the End.DT4/DT6 sets the VRF (associated with the 572 * corresponding routing table) as the *receiving* interface. 573 * In other words, the End.DT4/DT6 processes a packet as if it has been received 574 * directly by the VRF (and not by one of its slave devices, if any). 575 * In this way, the VRF interface is used for routing the IPv4/IPv6 packet in 576 * according to the routing table configured by the End.DT4/DT6 instance. 577 * 578 * This design allows you to get some interesting features like: 579 * 1) the statistics on rx packets; 580 * 2) the possibility to install a packet sniffer on the receiving interface 581 * (the VRF one) for looking at the incoming packets; 582 * 3) the possibility to leverage the netfilter prerouting hook for the inner 583 * IPv4 packet. 584 * 585 * This function returns: 586 * - the sk_buff* when the VRF rcv handler has processed the packet correctly; 587 * - NULL when the skb is consumed by the VRF rcv handler; 588 * - a pointer which encodes a negative error number in case of error. 589 * Note that in this case, the function takes care of freeing the skb. 590 */ 591 static struct sk_buff *end_dt_vrf_rcv(struct sk_buff *skb, u16 family, 592 struct net_device *dev) 593 { 594 /* based on l3mdev_ip_rcv; we are only interested in the master */ 595 if (unlikely(!netif_is_l3_master(dev) && !netif_has_l3_rx_handler(dev))) 596 goto drop; 597 598 if (unlikely(!dev->l3mdev_ops->l3mdev_l3_rcv)) 599 goto drop; 600 601 /* the decap packet IPv4/IPv6 does not come with any mac header info. 602 * We must unset the mac header to allow the VRF device to rebuild it, 603 * just in case there is a sniffer attached on the device. 604 */ 605 skb_unset_mac_header(skb); 606 607 skb = dev->l3mdev_ops->l3mdev_l3_rcv(dev, skb, family); 608 if (!skb) 609 /* the skb buffer was consumed by the handler */ 610 return NULL; 611 612 /* when a packet is received by a VRF or by one of its slaves, the 613 * master device reference is set into the skb. 614 */ 615 if (unlikely(skb->dev != dev || skb->skb_iif != dev->ifindex)) 616 goto drop; 617 618 return skb; 619 620 drop: 621 kfree_skb(skb); 622 return ERR_PTR(-EINVAL); 623 } 624 625 static struct net_device *end_dt_get_vrf_rcu(struct sk_buff *skb, 626 struct seg6_end_dt_info *info) 627 { 628 int vrf_ifindex = info->vrf_ifindex; 629 struct net *net = info->net; 630 631 if (unlikely(vrf_ifindex < 0)) 632 goto error; 633 634 if (unlikely(!net_eq(dev_net(skb->dev), net))) 635 goto error; 636 637 return dev_get_by_index_rcu(net, vrf_ifindex); 638 639 error: 640 return NULL; 641 } 642 643 static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb, 644 struct seg6_local_lwt *slwt, u16 family) 645 { 646 struct seg6_end_dt_info *info = &slwt->dt_info; 647 struct net_device *vrf; 648 __be16 protocol; 649 int hdrlen; 650 651 vrf = end_dt_get_vrf_rcu(skb, info); 652 if (unlikely(!vrf)) 653 goto drop; 654 655 switch (family) { 656 case AF_INET: 657 protocol = htons(ETH_P_IP); 658 hdrlen = sizeof(struct iphdr); 659 break; 660 case AF_INET6: 661 protocol = htons(ETH_P_IPV6); 662 hdrlen = sizeof(struct ipv6hdr); 663 break; 664 case AF_UNSPEC: 665 fallthrough; 666 default: 667 goto drop; 668 } 669 670 if (unlikely(info->family != AF_UNSPEC && info->family != family)) { 671 pr_warn_once("seg6local: SRv6 End.DT* family mismatch"); 672 goto drop; 673 } 674 675 skb->protocol = protocol; 676 677 skb_dst_drop(skb); 678 679 skb_set_transport_header(skb, hdrlen); 680 nf_reset_ct(skb); 681 682 return end_dt_vrf_rcv(skb, family, vrf); 683 684 drop: 685 kfree_skb(skb); 686 return ERR_PTR(-EINVAL); 687 } 688 689 static int input_action_end_dt4(struct sk_buff *skb, 690 struct seg6_local_lwt *slwt) 691 { 692 struct iphdr *iph; 693 int err; 694 695 if (!decap_and_validate(skb, IPPROTO_IPIP)) 696 goto drop; 697 698 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 699 goto drop; 700 701 skb = end_dt_vrf_core(skb, slwt, AF_INET); 702 if (!skb) 703 /* packet has been processed and consumed by the VRF */ 704 return 0; 705 706 if (IS_ERR(skb)) 707 return PTR_ERR(skb); 708 709 iph = ip_hdr(skb); 710 711 err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev); 712 if (unlikely(err)) 713 goto drop; 714 715 return dst_input(skb); 716 717 drop: 718 kfree_skb(skb); 719 return -EINVAL; 720 } 721 722 static int seg6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg, 723 struct netlink_ext_ack *extack) 724 { 725 return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET, extack); 726 } 727 728 static enum 729 seg6_end_dt_mode seg6_end_dt6_parse_mode(struct seg6_local_lwt *slwt) 730 { 731 unsigned long parsed_optattrs = slwt->parsed_optattrs; 732 bool legacy, vrfmode; 733 734 legacy = !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE)); 735 vrfmode = !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE)); 736 737 if (!(legacy ^ vrfmode)) 738 /* both are absent or present: invalid DT6 mode */ 739 return DT_INVALID_MODE; 740 741 return legacy ? DT_LEGACY_MODE : DT_VRF_MODE; 742 } 743 744 static enum seg6_end_dt_mode seg6_end_dt6_get_mode(struct seg6_local_lwt *slwt) 745 { 746 struct seg6_end_dt_info *info = &slwt->dt_info; 747 748 return info->mode; 749 } 750 751 static int seg6_end_dt6_build(struct seg6_local_lwt *slwt, const void *cfg, 752 struct netlink_ext_ack *extack) 753 { 754 enum seg6_end_dt_mode mode = seg6_end_dt6_parse_mode(slwt); 755 struct seg6_end_dt_info *info = &slwt->dt_info; 756 757 switch (mode) { 758 case DT_LEGACY_MODE: 759 info->mode = DT_LEGACY_MODE; 760 return 0; 761 case DT_VRF_MODE: 762 return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET6, extack); 763 default: 764 NL_SET_ERR_MSG(extack, "table or vrftable must be specified"); 765 return -EINVAL; 766 } 767 } 768 #endif 769 770 static int input_action_end_dt6(struct sk_buff *skb, 771 struct seg6_local_lwt *slwt) 772 { 773 if (!decap_and_validate(skb, IPPROTO_IPV6)) 774 goto drop; 775 776 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 777 goto drop; 778 779 #ifdef CONFIG_NET_L3_MASTER_DEV 780 if (seg6_end_dt6_get_mode(slwt) == DT_LEGACY_MODE) 781 goto legacy_mode; 782 783 /* DT6_VRF_MODE */ 784 skb = end_dt_vrf_core(skb, slwt, AF_INET6); 785 if (!skb) 786 /* packet has been processed and consumed by the VRF */ 787 return 0; 788 789 if (IS_ERR(skb)) 790 return PTR_ERR(skb); 791 792 /* note: this time we do not need to specify the table because the VRF 793 * takes care of selecting the correct table. 794 */ 795 seg6_lookup_any_nexthop(skb, NULL, 0, true); 796 797 return dst_input(skb); 798 799 legacy_mode: 800 #endif 801 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 802 803 seg6_lookup_any_nexthop(skb, NULL, slwt->table, true); 804 805 return dst_input(skb); 806 807 drop: 808 kfree_skb(skb); 809 return -EINVAL; 810 } 811 812 #ifdef CONFIG_NET_L3_MASTER_DEV 813 static int seg6_end_dt46_build(struct seg6_local_lwt *slwt, const void *cfg, 814 struct netlink_ext_ack *extack) 815 { 816 return __seg6_end_dt_vrf_build(slwt, cfg, AF_UNSPEC, extack); 817 } 818 819 static int input_action_end_dt46(struct sk_buff *skb, 820 struct seg6_local_lwt *slwt) 821 { 822 unsigned int off = 0; 823 int nexthdr; 824 825 nexthdr = ipv6_find_hdr(skb, &off, -1, NULL, NULL); 826 if (unlikely(nexthdr < 0)) 827 goto drop; 828 829 switch (nexthdr) { 830 case IPPROTO_IPIP: 831 return input_action_end_dt4(skb, slwt); 832 case IPPROTO_IPV6: 833 return input_action_end_dt6(skb, slwt); 834 } 835 836 drop: 837 kfree_skb(skb); 838 return -EINVAL; 839 } 840 #endif 841 842 /* push an SRH on top of the current one */ 843 static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt) 844 { 845 struct ipv6_sr_hdr *srh; 846 int err = -EINVAL; 847 848 srh = get_and_validate_srh(skb); 849 if (!srh) 850 goto drop; 851 852 err = seg6_do_srh_inline(skb, slwt->srh); 853 if (err) 854 goto drop; 855 856 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 857 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 858 859 seg6_lookup_nexthop(skb, NULL, 0); 860 861 return dst_input(skb); 862 863 drop: 864 kfree_skb(skb); 865 return err; 866 } 867 868 /* encapsulate within an outer IPv6 header and a specified SRH */ 869 static int input_action_end_b6_encap(struct sk_buff *skb, 870 struct seg6_local_lwt *slwt) 871 { 872 struct ipv6_sr_hdr *srh; 873 int err = -EINVAL; 874 875 srh = get_and_validate_srh(skb); 876 if (!srh) 877 goto drop; 878 879 advance_nextseg(srh, &ipv6_hdr(skb)->daddr); 880 881 skb_reset_inner_headers(skb); 882 skb->encapsulation = 1; 883 884 err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6); 885 if (err) 886 goto drop; 887 888 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 889 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 890 891 seg6_lookup_nexthop(skb, NULL, 0); 892 893 return dst_input(skb); 894 895 drop: 896 kfree_skb(skb); 897 return err; 898 } 899 900 DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states); 901 902 bool seg6_bpf_has_valid_srh(struct sk_buff *skb) 903 { 904 struct seg6_bpf_srh_state *srh_state = 905 this_cpu_ptr(&seg6_bpf_srh_states); 906 struct ipv6_sr_hdr *srh = srh_state->srh; 907 908 if (unlikely(srh == NULL)) 909 return false; 910 911 if (unlikely(!srh_state->valid)) { 912 if ((srh_state->hdrlen & 7) != 0) 913 return false; 914 915 srh->hdrlen = (u8)(srh_state->hdrlen >> 3); 916 if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3, true)) 917 return false; 918 919 srh_state->valid = true; 920 } 921 922 return true; 923 } 924 925 static int input_action_end_bpf(struct sk_buff *skb, 926 struct seg6_local_lwt *slwt) 927 { 928 struct seg6_bpf_srh_state *srh_state = 929 this_cpu_ptr(&seg6_bpf_srh_states); 930 struct ipv6_sr_hdr *srh; 931 int ret; 932 933 srh = get_and_validate_srh(skb); 934 if (!srh) { 935 kfree_skb(skb); 936 return -EINVAL; 937 } 938 advance_nextseg(srh, &ipv6_hdr(skb)->daddr); 939 940 /* preempt_disable is needed to protect the per-CPU buffer srh_state, 941 * which is also accessed by the bpf_lwt_seg6_* helpers 942 */ 943 preempt_disable(); 944 srh_state->srh = srh; 945 srh_state->hdrlen = srh->hdrlen << 3; 946 srh_state->valid = true; 947 948 rcu_read_lock(); 949 bpf_compute_data_pointers(skb); 950 ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb); 951 rcu_read_unlock(); 952 953 switch (ret) { 954 case BPF_OK: 955 case BPF_REDIRECT: 956 break; 957 case BPF_DROP: 958 goto drop; 959 default: 960 pr_warn_once("bpf-seg6local: Illegal return value %u\n", ret); 961 goto drop; 962 } 963 964 if (srh_state->srh && !seg6_bpf_has_valid_srh(skb)) 965 goto drop; 966 967 preempt_enable(); 968 if (ret != BPF_REDIRECT) 969 seg6_lookup_nexthop(skb, NULL, 0); 970 971 return dst_input(skb); 972 973 drop: 974 preempt_enable(); 975 kfree_skb(skb); 976 return -EINVAL; 977 } 978 979 static struct seg6_action_desc seg6_action_table[] = { 980 { 981 .action = SEG6_LOCAL_ACTION_END, 982 .attrs = 0, 983 .optattrs = SEG6_F_LOCAL_COUNTERS, 984 .input = input_action_end, 985 }, 986 { 987 .action = SEG6_LOCAL_ACTION_END_X, 988 .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6), 989 .optattrs = SEG6_F_LOCAL_COUNTERS, 990 .input = input_action_end_x, 991 }, 992 { 993 .action = SEG6_LOCAL_ACTION_END_T, 994 .attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE), 995 .optattrs = SEG6_F_LOCAL_COUNTERS, 996 .input = input_action_end_t, 997 }, 998 { 999 .action = SEG6_LOCAL_ACTION_END_DX2, 1000 .attrs = SEG6_F_ATTR(SEG6_LOCAL_OIF), 1001 .optattrs = SEG6_F_LOCAL_COUNTERS, 1002 .input = input_action_end_dx2, 1003 }, 1004 { 1005 .action = SEG6_LOCAL_ACTION_END_DX6, 1006 .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6), 1007 .optattrs = SEG6_F_LOCAL_COUNTERS, 1008 .input = input_action_end_dx6, 1009 }, 1010 { 1011 .action = SEG6_LOCAL_ACTION_END_DX4, 1012 .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH4), 1013 .optattrs = SEG6_F_LOCAL_COUNTERS, 1014 .input = input_action_end_dx4, 1015 }, 1016 { 1017 .action = SEG6_LOCAL_ACTION_END_DT4, 1018 .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE), 1019 .optattrs = SEG6_F_LOCAL_COUNTERS, 1020 #ifdef CONFIG_NET_L3_MASTER_DEV 1021 .input = input_action_end_dt4, 1022 .slwt_ops = { 1023 .build_state = seg6_end_dt4_build, 1024 }, 1025 #endif 1026 }, 1027 { 1028 .action = SEG6_LOCAL_ACTION_END_DT6, 1029 #ifdef CONFIG_NET_L3_MASTER_DEV 1030 .attrs = 0, 1031 .optattrs = SEG6_F_LOCAL_COUNTERS | 1032 SEG6_F_ATTR(SEG6_LOCAL_TABLE) | 1033 SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE), 1034 .slwt_ops = { 1035 .build_state = seg6_end_dt6_build, 1036 }, 1037 #else 1038 .attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE), 1039 .optattrs = SEG6_F_LOCAL_COUNTERS, 1040 #endif 1041 .input = input_action_end_dt6, 1042 }, 1043 { 1044 .action = SEG6_LOCAL_ACTION_END_DT46, 1045 .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE), 1046 .optattrs = SEG6_F_LOCAL_COUNTERS, 1047 #ifdef CONFIG_NET_L3_MASTER_DEV 1048 .input = input_action_end_dt46, 1049 .slwt_ops = { 1050 .build_state = seg6_end_dt46_build, 1051 }, 1052 #endif 1053 }, 1054 { 1055 .action = SEG6_LOCAL_ACTION_END_B6, 1056 .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH), 1057 .optattrs = SEG6_F_LOCAL_COUNTERS, 1058 .input = input_action_end_b6, 1059 }, 1060 { 1061 .action = SEG6_LOCAL_ACTION_END_B6_ENCAP, 1062 .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH), 1063 .optattrs = SEG6_F_LOCAL_COUNTERS, 1064 .input = input_action_end_b6_encap, 1065 .static_headroom = sizeof(struct ipv6hdr), 1066 }, 1067 { 1068 .action = SEG6_LOCAL_ACTION_END_BPF, 1069 .attrs = SEG6_F_ATTR(SEG6_LOCAL_BPF), 1070 .optattrs = SEG6_F_LOCAL_COUNTERS, 1071 .input = input_action_end_bpf, 1072 }, 1073 1074 }; 1075 1076 static struct seg6_action_desc *__get_action_desc(int action) 1077 { 1078 struct seg6_action_desc *desc; 1079 int i, count; 1080 1081 count = ARRAY_SIZE(seg6_action_table); 1082 for (i = 0; i < count; i++) { 1083 desc = &seg6_action_table[i]; 1084 if (desc->action == action) 1085 return desc; 1086 } 1087 1088 return NULL; 1089 } 1090 1091 static bool seg6_lwtunnel_counters_enabled(struct seg6_local_lwt *slwt) 1092 { 1093 return slwt->parsed_optattrs & SEG6_F_LOCAL_COUNTERS; 1094 } 1095 1096 static void seg6_local_update_counters(struct seg6_local_lwt *slwt, 1097 unsigned int len, int err) 1098 { 1099 struct pcpu_seg6_local_counters *pcounters; 1100 1101 pcounters = this_cpu_ptr(slwt->pcpu_counters); 1102 u64_stats_update_begin(&pcounters->syncp); 1103 1104 if (likely(!err)) { 1105 u64_stats_inc(&pcounters->packets); 1106 u64_stats_add(&pcounters->bytes, len); 1107 } else { 1108 u64_stats_inc(&pcounters->errors); 1109 } 1110 1111 u64_stats_update_end(&pcounters->syncp); 1112 } 1113 1114 static int seg6_local_input_core(struct net *net, struct sock *sk, 1115 struct sk_buff *skb) 1116 { 1117 struct dst_entry *orig_dst = skb_dst(skb); 1118 struct seg6_action_desc *desc; 1119 struct seg6_local_lwt *slwt; 1120 unsigned int len = skb->len; 1121 int rc; 1122 1123 slwt = seg6_local_lwtunnel(orig_dst->lwtstate); 1124 desc = slwt->desc; 1125 1126 rc = desc->input(skb, slwt); 1127 1128 if (!seg6_lwtunnel_counters_enabled(slwt)) 1129 return rc; 1130 1131 seg6_local_update_counters(slwt, len, rc); 1132 1133 return rc; 1134 } 1135 1136 static int seg6_local_input(struct sk_buff *skb) 1137 { 1138 if (skb->protocol != htons(ETH_P_IPV6)) { 1139 kfree_skb(skb); 1140 return -EINVAL; 1141 } 1142 1143 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 1144 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, 1145 dev_net(skb->dev), NULL, skb, skb->dev, NULL, 1146 seg6_local_input_core); 1147 1148 return seg6_local_input_core(dev_net(skb->dev), NULL, skb); 1149 } 1150 1151 static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = { 1152 [SEG6_LOCAL_ACTION] = { .type = NLA_U32 }, 1153 [SEG6_LOCAL_SRH] = { .type = NLA_BINARY }, 1154 [SEG6_LOCAL_TABLE] = { .type = NLA_U32 }, 1155 [SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 }, 1156 [SEG6_LOCAL_NH4] = { .type = NLA_BINARY, 1157 .len = sizeof(struct in_addr) }, 1158 [SEG6_LOCAL_NH6] = { .type = NLA_BINARY, 1159 .len = sizeof(struct in6_addr) }, 1160 [SEG6_LOCAL_IIF] = { .type = NLA_U32 }, 1161 [SEG6_LOCAL_OIF] = { .type = NLA_U32 }, 1162 [SEG6_LOCAL_BPF] = { .type = NLA_NESTED }, 1163 [SEG6_LOCAL_COUNTERS] = { .type = NLA_NESTED }, 1164 }; 1165 1166 static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1167 { 1168 struct ipv6_sr_hdr *srh; 1169 int len; 1170 1171 srh = nla_data(attrs[SEG6_LOCAL_SRH]); 1172 len = nla_len(attrs[SEG6_LOCAL_SRH]); 1173 1174 /* SRH must contain at least one segment */ 1175 if (len < sizeof(*srh) + sizeof(struct in6_addr)) 1176 return -EINVAL; 1177 1178 if (!seg6_validate_srh(srh, len, false)) 1179 return -EINVAL; 1180 1181 slwt->srh = kmemdup(srh, len, GFP_KERNEL); 1182 if (!slwt->srh) 1183 return -ENOMEM; 1184 1185 slwt->headroom += len; 1186 1187 return 0; 1188 } 1189 1190 static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1191 { 1192 struct ipv6_sr_hdr *srh; 1193 struct nlattr *nla; 1194 int len; 1195 1196 srh = slwt->srh; 1197 len = (srh->hdrlen + 1) << 3; 1198 1199 nla = nla_reserve(skb, SEG6_LOCAL_SRH, len); 1200 if (!nla) 1201 return -EMSGSIZE; 1202 1203 memcpy(nla_data(nla), srh, len); 1204 1205 return 0; 1206 } 1207 1208 static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1209 { 1210 int len = (a->srh->hdrlen + 1) << 3; 1211 1212 if (len != ((b->srh->hdrlen + 1) << 3)) 1213 return 1; 1214 1215 return memcmp(a->srh, b->srh, len); 1216 } 1217 1218 static void destroy_attr_srh(struct seg6_local_lwt *slwt) 1219 { 1220 kfree(slwt->srh); 1221 } 1222 1223 static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1224 { 1225 slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]); 1226 1227 return 0; 1228 } 1229 1230 static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1231 { 1232 if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table)) 1233 return -EMSGSIZE; 1234 1235 return 0; 1236 } 1237 1238 static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1239 { 1240 if (a->table != b->table) 1241 return 1; 1242 1243 return 0; 1244 } 1245 1246 static struct 1247 seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt) 1248 { 1249 #ifdef CONFIG_NET_L3_MASTER_DEV 1250 return &slwt->dt_info; 1251 #else 1252 return ERR_PTR(-EOPNOTSUPP); 1253 #endif 1254 } 1255 1256 static int parse_nla_vrftable(struct nlattr **attrs, 1257 struct seg6_local_lwt *slwt) 1258 { 1259 struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt); 1260 1261 if (IS_ERR(info)) 1262 return PTR_ERR(info); 1263 1264 info->vrf_table = nla_get_u32(attrs[SEG6_LOCAL_VRFTABLE]); 1265 1266 return 0; 1267 } 1268 1269 static int put_nla_vrftable(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1270 { 1271 struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt); 1272 1273 if (IS_ERR(info)) 1274 return PTR_ERR(info); 1275 1276 if (nla_put_u32(skb, SEG6_LOCAL_VRFTABLE, info->vrf_table)) 1277 return -EMSGSIZE; 1278 1279 return 0; 1280 } 1281 1282 static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1283 { 1284 struct seg6_end_dt_info *info_a = seg6_possible_end_dt_info(a); 1285 struct seg6_end_dt_info *info_b = seg6_possible_end_dt_info(b); 1286 1287 if (info_a->vrf_table != info_b->vrf_table) 1288 return 1; 1289 1290 return 0; 1291 } 1292 1293 static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1294 { 1295 memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]), 1296 sizeof(struct in_addr)); 1297 1298 return 0; 1299 } 1300 1301 static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1302 { 1303 struct nlattr *nla; 1304 1305 nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr)); 1306 if (!nla) 1307 return -EMSGSIZE; 1308 1309 memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr)); 1310 1311 return 0; 1312 } 1313 1314 static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1315 { 1316 return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr)); 1317 } 1318 1319 static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1320 { 1321 memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]), 1322 sizeof(struct in6_addr)); 1323 1324 return 0; 1325 } 1326 1327 static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1328 { 1329 struct nlattr *nla; 1330 1331 nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr)); 1332 if (!nla) 1333 return -EMSGSIZE; 1334 1335 memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr)); 1336 1337 return 0; 1338 } 1339 1340 static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1341 { 1342 return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr)); 1343 } 1344 1345 static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1346 { 1347 slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]); 1348 1349 return 0; 1350 } 1351 1352 static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1353 { 1354 if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif)) 1355 return -EMSGSIZE; 1356 1357 return 0; 1358 } 1359 1360 static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1361 { 1362 if (a->iif != b->iif) 1363 return 1; 1364 1365 return 0; 1366 } 1367 1368 static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1369 { 1370 slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]); 1371 1372 return 0; 1373 } 1374 1375 static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1376 { 1377 if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif)) 1378 return -EMSGSIZE; 1379 1380 return 0; 1381 } 1382 1383 static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1384 { 1385 if (a->oif != b->oif) 1386 return 1; 1387 1388 return 0; 1389 } 1390 1391 #define MAX_PROG_NAME 256 1392 static const struct nla_policy bpf_prog_policy[SEG6_LOCAL_BPF_PROG_MAX + 1] = { 1393 [SEG6_LOCAL_BPF_PROG] = { .type = NLA_U32, }, 1394 [SEG6_LOCAL_BPF_PROG_NAME] = { .type = NLA_NUL_STRING, 1395 .len = MAX_PROG_NAME }, 1396 }; 1397 1398 static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1399 { 1400 struct nlattr *tb[SEG6_LOCAL_BPF_PROG_MAX + 1]; 1401 struct bpf_prog *p; 1402 int ret; 1403 u32 fd; 1404 1405 ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_BPF_PROG_MAX, 1406 attrs[SEG6_LOCAL_BPF], 1407 bpf_prog_policy, NULL); 1408 if (ret < 0) 1409 return ret; 1410 1411 if (!tb[SEG6_LOCAL_BPF_PROG] || !tb[SEG6_LOCAL_BPF_PROG_NAME]) 1412 return -EINVAL; 1413 1414 slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_KERNEL); 1415 if (!slwt->bpf.name) 1416 return -ENOMEM; 1417 1418 fd = nla_get_u32(tb[SEG6_LOCAL_BPF_PROG]); 1419 p = bpf_prog_get_type(fd, BPF_PROG_TYPE_LWT_SEG6LOCAL); 1420 if (IS_ERR(p)) { 1421 kfree(slwt->bpf.name); 1422 return PTR_ERR(p); 1423 } 1424 1425 slwt->bpf.prog = p; 1426 return 0; 1427 } 1428 1429 static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1430 { 1431 struct nlattr *nest; 1432 1433 if (!slwt->bpf.prog) 1434 return 0; 1435 1436 nest = nla_nest_start_noflag(skb, SEG6_LOCAL_BPF); 1437 if (!nest) 1438 return -EMSGSIZE; 1439 1440 if (nla_put_u32(skb, SEG6_LOCAL_BPF_PROG, slwt->bpf.prog->aux->id)) 1441 return -EMSGSIZE; 1442 1443 if (slwt->bpf.name && 1444 nla_put_string(skb, SEG6_LOCAL_BPF_PROG_NAME, slwt->bpf.name)) 1445 return -EMSGSIZE; 1446 1447 return nla_nest_end(skb, nest); 1448 } 1449 1450 static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1451 { 1452 if (!a->bpf.name && !b->bpf.name) 1453 return 0; 1454 1455 if (!a->bpf.name || !b->bpf.name) 1456 return 1; 1457 1458 return strcmp(a->bpf.name, b->bpf.name); 1459 } 1460 1461 static void destroy_attr_bpf(struct seg6_local_lwt *slwt) 1462 { 1463 kfree(slwt->bpf.name); 1464 if (slwt->bpf.prog) 1465 bpf_prog_put(slwt->bpf.prog); 1466 } 1467 1468 static const struct 1469 nla_policy seg6_local_counters_policy[SEG6_LOCAL_CNT_MAX + 1] = { 1470 [SEG6_LOCAL_CNT_PACKETS] = { .type = NLA_U64 }, 1471 [SEG6_LOCAL_CNT_BYTES] = { .type = NLA_U64 }, 1472 [SEG6_LOCAL_CNT_ERRORS] = { .type = NLA_U64 }, 1473 }; 1474 1475 static int parse_nla_counters(struct nlattr **attrs, 1476 struct seg6_local_lwt *slwt) 1477 { 1478 struct pcpu_seg6_local_counters __percpu *pcounters; 1479 struct nlattr *tb[SEG6_LOCAL_CNT_MAX + 1]; 1480 int ret; 1481 1482 ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_CNT_MAX, 1483 attrs[SEG6_LOCAL_COUNTERS], 1484 seg6_local_counters_policy, NULL); 1485 if (ret < 0) 1486 return ret; 1487 1488 /* basic support for SRv6 Behavior counters requires at least: 1489 * packets, bytes and errors. 1490 */ 1491 if (!tb[SEG6_LOCAL_CNT_PACKETS] || !tb[SEG6_LOCAL_CNT_BYTES] || 1492 !tb[SEG6_LOCAL_CNT_ERRORS]) 1493 return -EINVAL; 1494 1495 /* counters are always zero initialized */ 1496 pcounters = seg6_local_alloc_pcpu_counters(GFP_KERNEL); 1497 if (!pcounters) 1498 return -ENOMEM; 1499 1500 slwt->pcpu_counters = pcounters; 1501 1502 return 0; 1503 } 1504 1505 static int seg6_local_fill_nla_counters(struct sk_buff *skb, 1506 struct seg6_local_counters *counters) 1507 { 1508 if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_PACKETS, counters->packets, 1509 SEG6_LOCAL_CNT_PAD)) 1510 return -EMSGSIZE; 1511 1512 if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_BYTES, counters->bytes, 1513 SEG6_LOCAL_CNT_PAD)) 1514 return -EMSGSIZE; 1515 1516 if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_ERRORS, counters->errors, 1517 SEG6_LOCAL_CNT_PAD)) 1518 return -EMSGSIZE; 1519 1520 return 0; 1521 } 1522 1523 static int put_nla_counters(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1524 { 1525 struct seg6_local_counters counters = { 0, 0, 0 }; 1526 struct nlattr *nest; 1527 int rc, i; 1528 1529 nest = nla_nest_start(skb, SEG6_LOCAL_COUNTERS); 1530 if (!nest) 1531 return -EMSGSIZE; 1532 1533 for_each_possible_cpu(i) { 1534 struct pcpu_seg6_local_counters *pcounters; 1535 u64 packets, bytes, errors; 1536 unsigned int start; 1537 1538 pcounters = per_cpu_ptr(slwt->pcpu_counters, i); 1539 do { 1540 start = u64_stats_fetch_begin_irq(&pcounters->syncp); 1541 1542 packets = u64_stats_read(&pcounters->packets); 1543 bytes = u64_stats_read(&pcounters->bytes); 1544 errors = u64_stats_read(&pcounters->errors); 1545 1546 } while (u64_stats_fetch_retry_irq(&pcounters->syncp, start)); 1547 1548 counters.packets += packets; 1549 counters.bytes += bytes; 1550 counters.errors += errors; 1551 } 1552 1553 rc = seg6_local_fill_nla_counters(skb, &counters); 1554 if (rc < 0) { 1555 nla_nest_cancel(skb, nest); 1556 return rc; 1557 } 1558 1559 return nla_nest_end(skb, nest); 1560 } 1561 1562 static int cmp_nla_counters(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1563 { 1564 /* a and b are equal if both have pcpu_counters set or not */ 1565 return (!!((unsigned long)a->pcpu_counters)) ^ 1566 (!!((unsigned long)b->pcpu_counters)); 1567 } 1568 1569 static void destroy_attr_counters(struct seg6_local_lwt *slwt) 1570 { 1571 free_percpu(slwt->pcpu_counters); 1572 } 1573 1574 struct seg6_action_param { 1575 int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt); 1576 int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt); 1577 int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b); 1578 1579 /* optional destroy() callback useful for releasing resources which 1580 * have been previously acquired in the corresponding parse() 1581 * function. 1582 */ 1583 void (*destroy)(struct seg6_local_lwt *slwt); 1584 }; 1585 1586 static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = { 1587 [SEG6_LOCAL_SRH] = { .parse = parse_nla_srh, 1588 .put = put_nla_srh, 1589 .cmp = cmp_nla_srh, 1590 .destroy = destroy_attr_srh }, 1591 1592 [SEG6_LOCAL_TABLE] = { .parse = parse_nla_table, 1593 .put = put_nla_table, 1594 .cmp = cmp_nla_table }, 1595 1596 [SEG6_LOCAL_NH4] = { .parse = parse_nla_nh4, 1597 .put = put_nla_nh4, 1598 .cmp = cmp_nla_nh4 }, 1599 1600 [SEG6_LOCAL_NH6] = { .parse = parse_nla_nh6, 1601 .put = put_nla_nh6, 1602 .cmp = cmp_nla_nh6 }, 1603 1604 [SEG6_LOCAL_IIF] = { .parse = parse_nla_iif, 1605 .put = put_nla_iif, 1606 .cmp = cmp_nla_iif }, 1607 1608 [SEG6_LOCAL_OIF] = { .parse = parse_nla_oif, 1609 .put = put_nla_oif, 1610 .cmp = cmp_nla_oif }, 1611 1612 [SEG6_LOCAL_BPF] = { .parse = parse_nla_bpf, 1613 .put = put_nla_bpf, 1614 .cmp = cmp_nla_bpf, 1615 .destroy = destroy_attr_bpf }, 1616 1617 [SEG6_LOCAL_VRFTABLE] = { .parse = parse_nla_vrftable, 1618 .put = put_nla_vrftable, 1619 .cmp = cmp_nla_vrftable }, 1620 1621 [SEG6_LOCAL_COUNTERS] = { .parse = parse_nla_counters, 1622 .put = put_nla_counters, 1623 .cmp = cmp_nla_counters, 1624 .destroy = destroy_attr_counters }, 1625 }; 1626 1627 /* call the destroy() callback (if available) for each set attribute in 1628 * @parsed_attrs, starting from the first attribute up to the @max_parsed 1629 * (excluded) attribute. 1630 */ 1631 static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed, 1632 struct seg6_local_lwt *slwt) 1633 { 1634 struct seg6_action_param *param; 1635 int i; 1636 1637 /* Every required seg6local attribute is identified by an ID which is 1638 * encoded as a flag (i.e: 1 << ID) in the 'attrs' bitmask; 1639 * 1640 * We scan the 'parsed_attrs' bitmask, starting from the first attribute 1641 * up to the @max_parsed (excluded) attribute. 1642 * For each set attribute, we retrieve the corresponding destroy() 1643 * callback. If the callback is not available, then we skip to the next 1644 * attribute; otherwise, we call the destroy() callback. 1645 */ 1646 for (i = 0; i < max_parsed; ++i) { 1647 if (!(parsed_attrs & SEG6_F_ATTR(i))) 1648 continue; 1649 1650 param = &seg6_action_params[i]; 1651 1652 if (param->destroy) 1653 param->destroy(slwt); 1654 } 1655 } 1656 1657 /* release all the resources that may have been acquired during parsing 1658 * operations. 1659 */ 1660 static void destroy_attrs(struct seg6_local_lwt *slwt) 1661 { 1662 unsigned long attrs = slwt->desc->attrs | slwt->parsed_optattrs; 1663 1664 __destroy_attrs(attrs, SEG6_LOCAL_MAX + 1, slwt); 1665 } 1666 1667 static int parse_nla_optional_attrs(struct nlattr **attrs, 1668 struct seg6_local_lwt *slwt) 1669 { 1670 struct seg6_action_desc *desc = slwt->desc; 1671 unsigned long parsed_optattrs = 0; 1672 struct seg6_action_param *param; 1673 int err, i; 1674 1675 for (i = 0; i < SEG6_LOCAL_MAX + 1; ++i) { 1676 if (!(desc->optattrs & SEG6_F_ATTR(i)) || !attrs[i]) 1677 continue; 1678 1679 /* once here, the i-th attribute is provided by the 1680 * userspace AND it is identified optional as well. 1681 */ 1682 param = &seg6_action_params[i]; 1683 1684 err = param->parse(attrs, slwt); 1685 if (err < 0) 1686 goto parse_optattrs_err; 1687 1688 /* current attribute has been correctly parsed */ 1689 parsed_optattrs |= SEG6_F_ATTR(i); 1690 } 1691 1692 /* store in the tunnel state all the optional attributed successfully 1693 * parsed. 1694 */ 1695 slwt->parsed_optattrs = parsed_optattrs; 1696 1697 return 0; 1698 1699 parse_optattrs_err: 1700 __destroy_attrs(parsed_optattrs, i, slwt); 1701 1702 return err; 1703 } 1704 1705 /* call the custom constructor of the behavior during its initialization phase 1706 * and after that all its attributes have been parsed successfully. 1707 */ 1708 static int 1709 seg6_local_lwtunnel_build_state(struct seg6_local_lwt *slwt, const void *cfg, 1710 struct netlink_ext_ack *extack) 1711 { 1712 struct seg6_action_desc *desc = slwt->desc; 1713 struct seg6_local_lwtunnel_ops *ops; 1714 1715 ops = &desc->slwt_ops; 1716 if (!ops->build_state) 1717 return 0; 1718 1719 return ops->build_state(slwt, cfg, extack); 1720 } 1721 1722 /* call the custom destructor of the behavior which is invoked before the 1723 * tunnel is going to be destroyed. 1724 */ 1725 static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt) 1726 { 1727 struct seg6_action_desc *desc = slwt->desc; 1728 struct seg6_local_lwtunnel_ops *ops; 1729 1730 ops = &desc->slwt_ops; 1731 if (!ops->destroy_state) 1732 return; 1733 1734 ops->destroy_state(slwt); 1735 } 1736 1737 static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1738 { 1739 struct seg6_action_param *param; 1740 struct seg6_action_desc *desc; 1741 unsigned long invalid_attrs; 1742 int i, err; 1743 1744 desc = __get_action_desc(slwt->action); 1745 if (!desc) 1746 return -EINVAL; 1747 1748 if (!desc->input) 1749 return -EOPNOTSUPP; 1750 1751 slwt->desc = desc; 1752 slwt->headroom += desc->static_headroom; 1753 1754 /* Forcing the desc->optattrs *set* and the desc->attrs *set* to be 1755 * disjoined, this allow us to release acquired resources by optional 1756 * attributes and by required attributes independently from each other 1757 * without any interference. 1758 * In other terms, we are sure that we do not release some the acquired 1759 * resources twice. 1760 * 1761 * Note that if an attribute is configured both as required and as 1762 * optional, it means that the user has messed something up in the 1763 * seg6_action_table. Therefore, this check is required for SRv6 1764 * behaviors to work properly. 1765 */ 1766 invalid_attrs = desc->attrs & desc->optattrs; 1767 if (invalid_attrs) { 1768 WARN_ONCE(1, 1769 "An attribute cannot be both required AND optional"); 1770 return -EINVAL; 1771 } 1772 1773 /* parse the required attributes */ 1774 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { 1775 if (desc->attrs & SEG6_F_ATTR(i)) { 1776 if (!attrs[i]) 1777 return -EINVAL; 1778 1779 param = &seg6_action_params[i]; 1780 1781 err = param->parse(attrs, slwt); 1782 if (err < 0) 1783 goto parse_attrs_err; 1784 } 1785 } 1786 1787 /* parse the optional attributes, if any */ 1788 err = parse_nla_optional_attrs(attrs, slwt); 1789 if (err < 0) 1790 goto parse_attrs_err; 1791 1792 return 0; 1793 1794 parse_attrs_err: 1795 /* release any resource that may have been acquired during the i-1 1796 * parse() operations. 1797 */ 1798 __destroy_attrs(desc->attrs, i, slwt); 1799 1800 return err; 1801 } 1802 1803 static int seg6_local_build_state(struct net *net, struct nlattr *nla, 1804 unsigned int family, const void *cfg, 1805 struct lwtunnel_state **ts, 1806 struct netlink_ext_ack *extack) 1807 { 1808 struct nlattr *tb[SEG6_LOCAL_MAX + 1]; 1809 struct lwtunnel_state *newts; 1810 struct seg6_local_lwt *slwt; 1811 int err; 1812 1813 if (family != AF_INET6) 1814 return -EINVAL; 1815 1816 err = nla_parse_nested_deprecated(tb, SEG6_LOCAL_MAX, nla, 1817 seg6_local_policy, extack); 1818 1819 if (err < 0) 1820 return err; 1821 1822 if (!tb[SEG6_LOCAL_ACTION]) 1823 return -EINVAL; 1824 1825 newts = lwtunnel_state_alloc(sizeof(*slwt)); 1826 if (!newts) 1827 return -ENOMEM; 1828 1829 slwt = seg6_local_lwtunnel(newts); 1830 slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]); 1831 1832 err = parse_nla_action(tb, slwt); 1833 if (err < 0) 1834 goto out_free; 1835 1836 err = seg6_local_lwtunnel_build_state(slwt, cfg, extack); 1837 if (err < 0) 1838 goto out_destroy_attrs; 1839 1840 newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL; 1841 newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT; 1842 newts->headroom = slwt->headroom; 1843 1844 *ts = newts; 1845 1846 return 0; 1847 1848 out_destroy_attrs: 1849 destroy_attrs(slwt); 1850 out_free: 1851 kfree(newts); 1852 return err; 1853 } 1854 1855 static void seg6_local_destroy_state(struct lwtunnel_state *lwt) 1856 { 1857 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); 1858 1859 seg6_local_lwtunnel_destroy_state(slwt); 1860 1861 destroy_attrs(slwt); 1862 1863 return; 1864 } 1865 1866 static int seg6_local_fill_encap(struct sk_buff *skb, 1867 struct lwtunnel_state *lwt) 1868 { 1869 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); 1870 struct seg6_action_param *param; 1871 unsigned long attrs; 1872 int i, err; 1873 1874 if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action)) 1875 return -EMSGSIZE; 1876 1877 attrs = slwt->desc->attrs | slwt->parsed_optattrs; 1878 1879 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { 1880 if (attrs & SEG6_F_ATTR(i)) { 1881 param = &seg6_action_params[i]; 1882 err = param->put(skb, slwt); 1883 if (err < 0) 1884 return err; 1885 } 1886 } 1887 1888 return 0; 1889 } 1890 1891 static int seg6_local_get_encap_size(struct lwtunnel_state *lwt) 1892 { 1893 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); 1894 unsigned long attrs; 1895 int nlsize; 1896 1897 nlsize = nla_total_size(4); /* action */ 1898 1899 attrs = slwt->desc->attrs | slwt->parsed_optattrs; 1900 1901 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_SRH)) 1902 nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3); 1903 1904 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE)) 1905 nlsize += nla_total_size(4); 1906 1907 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH4)) 1908 nlsize += nla_total_size(4); 1909 1910 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH6)) 1911 nlsize += nla_total_size(16); 1912 1913 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_IIF)) 1914 nlsize += nla_total_size(4); 1915 1916 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_OIF)) 1917 nlsize += nla_total_size(4); 1918 1919 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_BPF)) 1920 nlsize += nla_total_size(sizeof(struct nlattr)) + 1921 nla_total_size(MAX_PROG_NAME) + 1922 nla_total_size(4); 1923 1924 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE)) 1925 nlsize += nla_total_size(4); 1926 1927 if (attrs & SEG6_F_LOCAL_COUNTERS) 1928 nlsize += nla_total_size(0) + /* nest SEG6_LOCAL_COUNTERS */ 1929 /* SEG6_LOCAL_CNT_PACKETS */ 1930 nla_total_size_64bit(sizeof(__u64)) + 1931 /* SEG6_LOCAL_CNT_BYTES */ 1932 nla_total_size_64bit(sizeof(__u64)) + 1933 /* SEG6_LOCAL_CNT_ERRORS */ 1934 nla_total_size_64bit(sizeof(__u64)); 1935 1936 return nlsize; 1937 } 1938 1939 static int seg6_local_cmp_encap(struct lwtunnel_state *a, 1940 struct lwtunnel_state *b) 1941 { 1942 struct seg6_local_lwt *slwt_a, *slwt_b; 1943 struct seg6_action_param *param; 1944 unsigned long attrs_a, attrs_b; 1945 int i; 1946 1947 slwt_a = seg6_local_lwtunnel(a); 1948 slwt_b = seg6_local_lwtunnel(b); 1949 1950 if (slwt_a->action != slwt_b->action) 1951 return 1; 1952 1953 attrs_a = slwt_a->desc->attrs | slwt_a->parsed_optattrs; 1954 attrs_b = slwt_b->desc->attrs | slwt_b->parsed_optattrs; 1955 1956 if (attrs_a != attrs_b) 1957 return 1; 1958 1959 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { 1960 if (attrs_a & SEG6_F_ATTR(i)) { 1961 param = &seg6_action_params[i]; 1962 if (param->cmp(slwt_a, slwt_b)) 1963 return 1; 1964 } 1965 } 1966 1967 return 0; 1968 } 1969 1970 static const struct lwtunnel_encap_ops seg6_local_ops = { 1971 .build_state = seg6_local_build_state, 1972 .destroy_state = seg6_local_destroy_state, 1973 .input = seg6_local_input, 1974 .fill_encap = seg6_local_fill_encap, 1975 .get_encap_size = seg6_local_get_encap_size, 1976 .cmp_encap = seg6_local_cmp_encap, 1977 .owner = THIS_MODULE, 1978 }; 1979 1980 int __init seg6_local_init(void) 1981 { 1982 /* If the max total number of defined attributes is reached, then your 1983 * kernel build stops here. 1984 * 1985 * This check is required to avoid arithmetic overflows when processing 1986 * behavior attributes and the maximum number of defined attributes 1987 * exceeds the allowed value. 1988 */ 1989 BUILD_BUG_ON(SEG6_LOCAL_MAX + 1 > BITS_PER_TYPE(unsigned long)); 1990 1991 return lwtunnel_encap_add_ops(&seg6_local_ops, 1992 LWTUNNEL_ENCAP_SEG6_LOCAL); 1993 } 1994 1995 void seg6_local_exit(void) 1996 { 1997 lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL); 1998 } 1999