1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * SR-IPv6 implementation 4 * 5 * Author: 6 * David Lebrun <david.lebrun@uclouvain.be> 7 */ 8 9 #include <linux/types.h> 10 #include <linux/skbuff.h> 11 #include <linux/net.h> 12 #include <linux/module.h> 13 #include <net/ip.h> 14 #include <net/ip_tunnels.h> 15 #include <net/lwtunnel.h> 16 #include <net/netevent.h> 17 #include <net/netns/generic.h> 18 #include <net/ip6_fib.h> 19 #include <net/route.h> 20 #include <net/seg6.h> 21 #include <linux/seg6.h> 22 #include <linux/seg6_iptunnel.h> 23 #include <net/addrconf.h> 24 #include <net/ip6_route.h> 25 #include <net/dst_cache.h> 26 #ifdef CONFIG_IPV6_SEG6_HMAC 27 #include <net/seg6_hmac.h> 28 #endif 29 #include <linux/netfilter.h> 30 31 static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) 32 { 33 int head = 0; 34 35 switch (tuninfo->mode) { 36 case SEG6_IPTUN_MODE_INLINE: 37 break; 38 case SEG6_IPTUN_MODE_ENCAP: 39 head = sizeof(struct ipv6hdr); 40 break; 41 case SEG6_IPTUN_MODE_L2ENCAP: 42 return 0; 43 } 44 45 return ((tuninfo->srh->hdrlen + 1) << 3) + head; 46 } 47 48 struct seg6_lwt { 49 struct dst_cache cache; 50 struct seg6_iptunnel_encap tuninfo[]; 51 }; 52 53 static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt) 54 { 55 return (struct seg6_lwt *)lwt->data; 56 } 57 58 static inline struct seg6_iptunnel_encap * 59 seg6_encap_lwtunnel(struct lwtunnel_state *lwt) 60 { 61 return seg6_lwt_lwtunnel(lwt)->tuninfo; 62 } 63 64 static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = { 65 [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY }, 66 }; 67 68 static int nla_put_srh(struct sk_buff *skb, int attrtype, 69 struct seg6_iptunnel_encap *tuninfo) 70 { 71 struct seg6_iptunnel_encap *data; 72 struct nlattr *nla; 73 int len; 74 75 len = SEG6_IPTUN_ENCAP_SIZE(tuninfo); 76 77 nla = nla_reserve(skb, attrtype, len); 78 if (!nla) 79 return -EMSGSIZE; 80 81 data = nla_data(nla); 82 memcpy(data, tuninfo, len); 83 84 return 0; 85 } 86 87 static void set_tun_src(struct net *net, struct net_device *dev, 88 struct in6_addr *daddr, struct in6_addr *saddr) 89 { 90 struct seg6_pernet_data *sdata = seg6_pernet(net); 91 struct in6_addr *tun_src; 92 93 rcu_read_lock(); 94 95 tun_src = rcu_dereference(sdata->tun_src); 96 97 if (!ipv6_addr_any(tun_src)) { 98 memcpy(saddr, tun_src, sizeof(struct in6_addr)); 99 } else { 100 ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC, 101 saddr); 102 } 103 104 rcu_read_unlock(); 105 } 106 107 /* Compute flowlabel for outer IPv6 header */ 108 static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb, 109 struct ipv6hdr *inner_hdr) 110 { 111 int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel; 112 __be32 flowlabel = 0; 113 u32 hash; 114 115 if (do_flowlabel > 0) { 116 hash = skb_get_hash(skb); 117 hash = rol32(hash, 16); 118 flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; 119 } else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) { 120 flowlabel = ip6_flowlabel(inner_hdr); 121 } 122 return flowlabel; 123 } 124 125 /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ 126 int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) 127 { 128 struct dst_entry *dst = skb_dst(skb); 129 struct net *net = dev_net(dst->dev); 130 struct ipv6hdr *hdr, *inner_hdr; 131 struct ipv6_sr_hdr *isrh; 132 int hdrlen, tot_len, err; 133 __be32 flowlabel; 134 135 hdrlen = (osrh->hdrlen + 1) << 3; 136 tot_len = hdrlen + sizeof(*hdr); 137 138 err = skb_cow_head(skb, tot_len + skb->mac_len); 139 if (unlikely(err)) 140 return err; 141 142 inner_hdr = ipv6_hdr(skb); 143 flowlabel = seg6_make_flowlabel(net, skb, inner_hdr); 144 145 skb_push(skb, tot_len); 146 skb_reset_network_header(skb); 147 skb_mac_header_rebuild(skb); 148 hdr = ipv6_hdr(skb); 149 150 /* inherit tc, flowlabel and hlim 151 * hlim will be decremented in ip6_forward() afterwards and 152 * decapsulation will overwrite inner hlim with outer hlim 153 */ 154 155 if (skb->protocol == htons(ETH_P_IPV6)) { 156 ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), 157 flowlabel); 158 hdr->hop_limit = inner_hdr->hop_limit; 159 } else { 160 ip6_flow_hdr(hdr, 0, flowlabel); 161 hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); 162 163 memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); 164 165 /* the control block has been erased, so we have to set the 166 * iif once again. 167 * We read the receiving interface index directly from the 168 * skb->skb_iif as it is done in the IPv4 receiving path (i.e.: 169 * ip_rcv_core(...)). 170 */ 171 IP6CB(skb)->iif = skb->skb_iif; 172 } 173 174 hdr->nexthdr = NEXTHDR_ROUTING; 175 176 isrh = (void *)hdr + sizeof(*hdr); 177 memcpy(isrh, osrh, hdrlen); 178 179 isrh->nexthdr = proto; 180 181 hdr->daddr = isrh->segments[isrh->first_segment]; 182 set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); 183 184 #ifdef CONFIG_IPV6_SEG6_HMAC 185 if (sr_has_hmac(isrh)) { 186 err = seg6_push_hmac(net, &hdr->saddr, isrh); 187 if (unlikely(err)) 188 return err; 189 } 190 #endif 191 192 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 193 194 skb_postpush_rcsum(skb, hdr, tot_len); 195 196 return 0; 197 } 198 EXPORT_SYMBOL_GPL(seg6_do_srh_encap); 199 200 /* insert an SRH within an IPv6 packet, just after the IPv6 header */ 201 int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) 202 { 203 struct ipv6hdr *hdr, *oldhdr; 204 struct ipv6_sr_hdr *isrh; 205 int hdrlen, err; 206 207 hdrlen = (osrh->hdrlen + 1) << 3; 208 209 err = skb_cow_head(skb, hdrlen + skb->mac_len); 210 if (unlikely(err)) 211 return err; 212 213 oldhdr = ipv6_hdr(skb); 214 215 skb_pull(skb, sizeof(struct ipv6hdr)); 216 skb_postpull_rcsum(skb, skb_network_header(skb), 217 sizeof(struct ipv6hdr)); 218 219 skb_push(skb, sizeof(struct ipv6hdr) + hdrlen); 220 skb_reset_network_header(skb); 221 skb_mac_header_rebuild(skb); 222 223 hdr = ipv6_hdr(skb); 224 225 memmove(hdr, oldhdr, sizeof(*hdr)); 226 227 isrh = (void *)hdr + sizeof(*hdr); 228 memcpy(isrh, osrh, hdrlen); 229 230 isrh->nexthdr = hdr->nexthdr; 231 hdr->nexthdr = NEXTHDR_ROUTING; 232 233 isrh->segments[0] = hdr->daddr; 234 hdr->daddr = isrh->segments[isrh->first_segment]; 235 236 #ifdef CONFIG_IPV6_SEG6_HMAC 237 if (sr_has_hmac(isrh)) { 238 struct net *net = dev_net(skb_dst(skb)->dev); 239 240 err = seg6_push_hmac(net, &hdr->saddr, isrh); 241 if (unlikely(err)) 242 return err; 243 } 244 #endif 245 246 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 247 248 skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); 249 250 return 0; 251 } 252 EXPORT_SYMBOL_GPL(seg6_do_srh_inline); 253 254 static int seg6_do_srh(struct sk_buff *skb) 255 { 256 struct dst_entry *dst = skb_dst(skb); 257 struct seg6_iptunnel_encap *tinfo; 258 int proto, err = 0; 259 260 tinfo = seg6_encap_lwtunnel(dst->lwtstate); 261 262 switch (tinfo->mode) { 263 case SEG6_IPTUN_MODE_INLINE: 264 if (skb->protocol != htons(ETH_P_IPV6)) 265 return -EINVAL; 266 267 err = seg6_do_srh_inline(skb, tinfo->srh); 268 if (err) 269 return err; 270 break; 271 case SEG6_IPTUN_MODE_ENCAP: 272 err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6); 273 if (err) 274 return err; 275 276 if (skb->protocol == htons(ETH_P_IPV6)) 277 proto = IPPROTO_IPV6; 278 else if (skb->protocol == htons(ETH_P_IP)) 279 proto = IPPROTO_IPIP; 280 else 281 return -EINVAL; 282 283 err = seg6_do_srh_encap(skb, tinfo->srh, proto); 284 if (err) 285 return err; 286 287 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 288 skb_set_inner_protocol(skb, skb->protocol); 289 skb->protocol = htons(ETH_P_IPV6); 290 break; 291 case SEG6_IPTUN_MODE_L2ENCAP: 292 if (!skb_mac_header_was_set(skb)) 293 return -EINVAL; 294 295 if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0) 296 return -ENOMEM; 297 298 skb_mac_header_rebuild(skb); 299 skb_push(skb, skb->mac_len); 300 301 err = seg6_do_srh_encap(skb, tinfo->srh, IPPROTO_ETHERNET); 302 if (err) 303 return err; 304 305 skb->protocol = htons(ETH_P_IPV6); 306 break; 307 } 308 309 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 310 nf_reset_ct(skb); 311 312 return 0; 313 } 314 315 static int seg6_input_finish(struct net *net, struct sock *sk, 316 struct sk_buff *skb) 317 { 318 return dst_input(skb); 319 } 320 321 static int seg6_input_core(struct net *net, struct sock *sk, 322 struct sk_buff *skb) 323 { 324 struct dst_entry *orig_dst = skb_dst(skb); 325 struct dst_entry *dst = NULL; 326 struct seg6_lwt *slwt; 327 int err; 328 329 err = seg6_do_srh(skb); 330 if (unlikely(err)) { 331 kfree_skb(skb); 332 return err; 333 } 334 335 slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); 336 337 preempt_disable(); 338 dst = dst_cache_get(&slwt->cache); 339 preempt_enable(); 340 341 skb_dst_drop(skb); 342 343 if (!dst) { 344 ip6_route_input(skb); 345 dst = skb_dst(skb); 346 if (!dst->error) { 347 preempt_disable(); 348 dst_cache_set_ip6(&slwt->cache, dst, 349 &ipv6_hdr(skb)->saddr); 350 preempt_enable(); 351 } 352 } else { 353 skb_dst_set(skb, dst); 354 } 355 356 err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); 357 if (unlikely(err)) 358 return err; 359 360 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 361 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, 362 dev_net(skb->dev), NULL, skb, NULL, 363 skb_dst(skb)->dev, seg6_input_finish); 364 365 return seg6_input_finish(dev_net(skb->dev), NULL, skb); 366 } 367 368 static int seg6_input_nf(struct sk_buff *skb) 369 { 370 struct net_device *dev = skb_dst(skb)->dev; 371 struct net *net = dev_net(skb->dev); 372 373 switch (skb->protocol) { 374 case htons(ETH_P_IP): 375 return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL, 376 skb, NULL, dev, seg6_input_core); 377 case htons(ETH_P_IPV6): 378 return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL, 379 skb, NULL, dev, seg6_input_core); 380 } 381 382 return -EINVAL; 383 } 384 385 static int seg6_input(struct sk_buff *skb) 386 { 387 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 388 return seg6_input_nf(skb); 389 390 return seg6_input_core(dev_net(skb->dev), NULL, skb); 391 } 392 393 static int seg6_output_core(struct net *net, struct sock *sk, 394 struct sk_buff *skb) 395 { 396 struct dst_entry *orig_dst = skb_dst(skb); 397 struct dst_entry *dst = NULL; 398 struct seg6_lwt *slwt; 399 int err; 400 401 err = seg6_do_srh(skb); 402 if (unlikely(err)) 403 goto drop; 404 405 slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); 406 407 preempt_disable(); 408 dst = dst_cache_get(&slwt->cache); 409 preempt_enable(); 410 411 if (unlikely(!dst)) { 412 struct ipv6hdr *hdr = ipv6_hdr(skb); 413 struct flowi6 fl6; 414 415 memset(&fl6, 0, sizeof(fl6)); 416 fl6.daddr = hdr->daddr; 417 fl6.saddr = hdr->saddr; 418 fl6.flowlabel = ip6_flowinfo(hdr); 419 fl6.flowi6_mark = skb->mark; 420 fl6.flowi6_proto = hdr->nexthdr; 421 422 dst = ip6_route_output(net, NULL, &fl6); 423 if (dst->error) { 424 err = dst->error; 425 dst_release(dst); 426 goto drop; 427 } 428 429 preempt_disable(); 430 dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); 431 preempt_enable(); 432 } 433 434 skb_dst_drop(skb); 435 skb_dst_set(skb, dst); 436 437 err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); 438 if (unlikely(err)) 439 goto drop; 440 441 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 442 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, 443 NULL, skb_dst(skb)->dev, dst_output); 444 445 return dst_output(net, sk, skb); 446 drop: 447 kfree_skb(skb); 448 return err; 449 } 450 451 static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb) 452 { 453 struct net_device *dev = skb_dst(skb)->dev; 454 455 switch (skb->protocol) { 456 case htons(ETH_P_IP): 457 return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, 458 NULL, dev, seg6_output_core); 459 case htons(ETH_P_IPV6): 460 return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, 461 NULL, dev, seg6_output_core); 462 } 463 464 return -EINVAL; 465 } 466 467 static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 468 { 469 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 470 return seg6_output_nf(net, sk, skb); 471 472 return seg6_output_core(net, sk, skb); 473 } 474 475 static int seg6_build_state(struct net *net, struct nlattr *nla, 476 unsigned int family, const void *cfg, 477 struct lwtunnel_state **ts, 478 struct netlink_ext_ack *extack) 479 { 480 struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1]; 481 struct seg6_iptunnel_encap *tuninfo; 482 struct lwtunnel_state *newts; 483 int tuninfo_len, min_size; 484 struct seg6_lwt *slwt; 485 int err; 486 487 if (family != AF_INET && family != AF_INET6) 488 return -EINVAL; 489 490 err = nla_parse_nested_deprecated(tb, SEG6_IPTUNNEL_MAX, nla, 491 seg6_iptunnel_policy, extack); 492 493 if (err < 0) 494 return err; 495 496 if (!tb[SEG6_IPTUNNEL_SRH]) 497 return -EINVAL; 498 499 tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]); 500 tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]); 501 502 /* tuninfo must contain at least the iptunnel encap structure, 503 * the SRH and one segment 504 */ 505 min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) + 506 sizeof(struct in6_addr); 507 if (tuninfo_len < min_size) 508 return -EINVAL; 509 510 switch (tuninfo->mode) { 511 case SEG6_IPTUN_MODE_INLINE: 512 if (family != AF_INET6) 513 return -EINVAL; 514 515 break; 516 case SEG6_IPTUN_MODE_ENCAP: 517 break; 518 case SEG6_IPTUN_MODE_L2ENCAP: 519 break; 520 default: 521 return -EINVAL; 522 } 523 524 /* verify that SRH is consistent */ 525 if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo), false)) 526 return -EINVAL; 527 528 newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt)); 529 if (!newts) 530 return -ENOMEM; 531 532 slwt = seg6_lwt_lwtunnel(newts); 533 534 err = dst_cache_init(&slwt->cache, GFP_ATOMIC); 535 if (err) { 536 kfree(newts); 537 return err; 538 } 539 540 memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); 541 542 newts->type = LWTUNNEL_ENCAP_SEG6; 543 newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; 544 545 if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP) 546 newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; 547 548 newts->headroom = seg6_lwt_headroom(tuninfo); 549 550 *ts = newts; 551 552 return 0; 553 } 554 555 static void seg6_destroy_state(struct lwtunnel_state *lwt) 556 { 557 dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache); 558 } 559 560 static int seg6_fill_encap_info(struct sk_buff *skb, 561 struct lwtunnel_state *lwtstate) 562 { 563 struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); 564 565 if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo)) 566 return -EMSGSIZE; 567 568 return 0; 569 } 570 571 static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate) 572 { 573 struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); 574 575 return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); 576 } 577 578 static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) 579 { 580 struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a); 581 struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b); 582 int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr); 583 584 if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr)) 585 return 1; 586 587 return memcmp(a_hdr, b_hdr, len); 588 } 589 590 static const struct lwtunnel_encap_ops seg6_iptun_ops = { 591 .build_state = seg6_build_state, 592 .destroy_state = seg6_destroy_state, 593 .output = seg6_output, 594 .input = seg6_input, 595 .fill_encap = seg6_fill_encap_info, 596 .get_encap_size = seg6_encap_nlsize, 597 .cmp_encap = seg6_encap_cmp, 598 .owner = THIS_MODULE, 599 }; 600 601 int __init seg6_iptunnel_init(void) 602 { 603 return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); 604 } 605 606 void seg6_iptunnel_exit(void) 607 { 608 lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); 609 } 610