1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * SR-IPv6 implementation 4 * 5 * Author: 6 * David Lebrun <david.lebrun@uclouvain.be> 7 */ 8 9 #include <linux/types.h> 10 #include <linux/skbuff.h> 11 #include <linux/net.h> 12 #include <linux/module.h> 13 #include <net/ip.h> 14 #include <net/ip_tunnels.h> 15 #include <net/lwtunnel.h> 16 #include <net/netevent.h> 17 #include <net/netns/generic.h> 18 #include <net/ip6_fib.h> 19 #include <net/route.h> 20 #include <net/seg6.h> 21 #include <linux/seg6.h> 22 #include <linux/seg6_iptunnel.h> 23 #include <net/addrconf.h> 24 #include <net/ip6_route.h> 25 #include <net/dst_cache.h> 26 #ifdef CONFIG_IPV6_SEG6_HMAC 27 #include <net/seg6_hmac.h> 28 #endif 29 #include <linux/netfilter.h> 30 31 static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) 32 { 33 int head = 0; 34 35 switch (tuninfo->mode) { 36 case SEG6_IPTUN_MODE_INLINE: 37 break; 38 case SEG6_IPTUN_MODE_ENCAP: 39 head = sizeof(struct ipv6hdr); 40 break; 41 case SEG6_IPTUN_MODE_L2ENCAP: 42 return 0; 43 } 44 45 return ((tuninfo->srh->hdrlen + 1) << 3) + head; 46 } 47 48 struct seg6_lwt { 49 struct dst_cache cache; 50 struct seg6_iptunnel_encap tuninfo[]; 51 }; 52 53 static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt) 54 { 55 return (struct seg6_lwt *)lwt->data; 56 } 57 58 static inline struct seg6_iptunnel_encap * 59 seg6_encap_lwtunnel(struct lwtunnel_state *lwt) 60 { 61 return seg6_lwt_lwtunnel(lwt)->tuninfo; 62 } 63 64 static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = { 65 [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY }, 66 }; 67 68 static int nla_put_srh(struct sk_buff *skb, int attrtype, 69 struct seg6_iptunnel_encap *tuninfo) 70 { 71 struct seg6_iptunnel_encap *data; 72 struct nlattr *nla; 73 int len; 74 75 len = SEG6_IPTUN_ENCAP_SIZE(tuninfo); 76 77 nla = nla_reserve(skb, attrtype, len); 78 if (!nla) 79 return -EMSGSIZE; 80 81 data = nla_data(nla); 82 memcpy(data, tuninfo, len); 83 84 return 0; 85 } 86 87 static void set_tun_src(struct net *net, struct net_device *dev, 88 struct in6_addr *daddr, struct in6_addr *saddr) 89 { 90 struct seg6_pernet_data *sdata = seg6_pernet(net); 91 struct in6_addr *tun_src; 92 93 rcu_read_lock(); 94 95 tun_src = rcu_dereference(sdata->tun_src); 96 97 if (!ipv6_addr_any(tun_src)) { 98 memcpy(saddr, tun_src, sizeof(struct in6_addr)); 99 } else { 100 ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC, 101 saddr); 102 } 103 104 rcu_read_unlock(); 105 } 106 107 /* Compute flowlabel for outer IPv6 header */ 108 static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb, 109 struct ipv6hdr *inner_hdr) 110 { 111 int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel; 112 __be32 flowlabel = 0; 113 u32 hash; 114 115 if (do_flowlabel > 0) { 116 hash = skb_get_hash(skb); 117 hash = rol32(hash, 16); 118 flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; 119 } else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) { 120 flowlabel = ip6_flowlabel(inner_hdr); 121 } 122 return flowlabel; 123 } 124 125 /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ 126 int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) 127 { 128 struct dst_entry *dst = skb_dst(skb); 129 struct net *net = dev_net(dst->dev); 130 struct ipv6hdr *hdr, *inner_hdr; 131 struct ipv6_sr_hdr *isrh; 132 int hdrlen, tot_len, err; 133 __be32 flowlabel; 134 135 hdrlen = (osrh->hdrlen + 1) << 3; 136 tot_len = hdrlen + sizeof(*hdr); 137 138 err = skb_cow_head(skb, tot_len + skb->mac_len); 139 if (unlikely(err)) 140 return err; 141 142 inner_hdr = ipv6_hdr(skb); 143 flowlabel = seg6_make_flowlabel(net, skb, inner_hdr); 144 145 skb_push(skb, tot_len); 146 skb_reset_network_header(skb); 147 skb_mac_header_rebuild(skb); 148 hdr = ipv6_hdr(skb); 149 150 /* inherit tc, flowlabel and hlim 151 * hlim will be decremented in ip6_forward() afterwards and 152 * decapsulation will overwrite inner hlim with outer hlim 153 */ 154 155 if (skb->protocol == htons(ETH_P_IPV6)) { 156 ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), 157 flowlabel); 158 hdr->hop_limit = inner_hdr->hop_limit; 159 } else { 160 ip6_flow_hdr(hdr, 0, flowlabel); 161 hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); 162 163 memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); 164 165 /* the control block has been erased, so we have to set the 166 * iif once again. 167 * We read the receiving interface index directly from the 168 * skb->skb_iif as it is done in the IPv4 receiving path (i.e.: 169 * ip_rcv_core(...)). 170 */ 171 IP6CB(skb)->iif = skb->skb_iif; 172 } 173 174 hdr->nexthdr = NEXTHDR_ROUTING; 175 176 isrh = (void *)hdr + sizeof(*hdr); 177 memcpy(isrh, osrh, hdrlen); 178 179 isrh->nexthdr = proto; 180 181 hdr->daddr = isrh->segments[isrh->first_segment]; 182 set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); 183 184 #ifdef CONFIG_IPV6_SEG6_HMAC 185 if (sr_has_hmac(isrh)) { 186 err = seg6_push_hmac(net, &hdr->saddr, isrh); 187 if (unlikely(err)) 188 return err; 189 } 190 #endif 191 192 skb_postpush_rcsum(skb, hdr, tot_len); 193 194 return 0; 195 } 196 EXPORT_SYMBOL_GPL(seg6_do_srh_encap); 197 198 /* insert an SRH within an IPv6 packet, just after the IPv6 header */ 199 int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) 200 { 201 struct ipv6hdr *hdr, *oldhdr; 202 struct ipv6_sr_hdr *isrh; 203 int hdrlen, err; 204 205 hdrlen = (osrh->hdrlen + 1) << 3; 206 207 err = skb_cow_head(skb, hdrlen + skb->mac_len); 208 if (unlikely(err)) 209 return err; 210 211 oldhdr = ipv6_hdr(skb); 212 213 skb_pull(skb, sizeof(struct ipv6hdr)); 214 skb_postpull_rcsum(skb, skb_network_header(skb), 215 sizeof(struct ipv6hdr)); 216 217 skb_push(skb, sizeof(struct ipv6hdr) + hdrlen); 218 skb_reset_network_header(skb); 219 skb_mac_header_rebuild(skb); 220 221 hdr = ipv6_hdr(skb); 222 223 memmove(hdr, oldhdr, sizeof(*hdr)); 224 225 isrh = (void *)hdr + sizeof(*hdr); 226 memcpy(isrh, osrh, hdrlen); 227 228 isrh->nexthdr = hdr->nexthdr; 229 hdr->nexthdr = NEXTHDR_ROUTING; 230 231 isrh->segments[0] = hdr->daddr; 232 hdr->daddr = isrh->segments[isrh->first_segment]; 233 234 #ifdef CONFIG_IPV6_SEG6_HMAC 235 if (sr_has_hmac(isrh)) { 236 struct net *net = dev_net(skb_dst(skb)->dev); 237 238 err = seg6_push_hmac(net, &hdr->saddr, isrh); 239 if (unlikely(err)) 240 return err; 241 } 242 #endif 243 244 skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); 245 246 return 0; 247 } 248 EXPORT_SYMBOL_GPL(seg6_do_srh_inline); 249 250 static int seg6_do_srh(struct sk_buff *skb) 251 { 252 struct dst_entry *dst = skb_dst(skb); 253 struct seg6_iptunnel_encap *tinfo; 254 int proto, err = 0; 255 256 tinfo = seg6_encap_lwtunnel(dst->lwtstate); 257 258 switch (tinfo->mode) { 259 case SEG6_IPTUN_MODE_INLINE: 260 if (skb->protocol != htons(ETH_P_IPV6)) 261 return -EINVAL; 262 263 err = seg6_do_srh_inline(skb, tinfo->srh); 264 if (err) 265 return err; 266 break; 267 case SEG6_IPTUN_MODE_ENCAP: 268 err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6); 269 if (err) 270 return err; 271 272 if (skb->protocol == htons(ETH_P_IPV6)) 273 proto = IPPROTO_IPV6; 274 else if (skb->protocol == htons(ETH_P_IP)) 275 proto = IPPROTO_IPIP; 276 else 277 return -EINVAL; 278 279 err = seg6_do_srh_encap(skb, tinfo->srh, proto); 280 if (err) 281 return err; 282 283 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 284 skb_set_inner_protocol(skb, skb->protocol); 285 skb->protocol = htons(ETH_P_IPV6); 286 break; 287 case SEG6_IPTUN_MODE_L2ENCAP: 288 if (!skb_mac_header_was_set(skb)) 289 return -EINVAL; 290 291 if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0) 292 return -ENOMEM; 293 294 skb_mac_header_rebuild(skb); 295 skb_push(skb, skb->mac_len); 296 297 err = seg6_do_srh_encap(skb, tinfo->srh, IPPROTO_ETHERNET); 298 if (err) 299 return err; 300 301 skb->protocol = htons(ETH_P_IPV6); 302 break; 303 } 304 305 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 306 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 307 nf_reset_ct(skb); 308 309 return 0; 310 } 311 312 static int seg6_input_finish(struct net *net, struct sock *sk, 313 struct sk_buff *skb) 314 { 315 return dst_input(skb); 316 } 317 318 static int seg6_input_core(struct net *net, struct sock *sk, 319 struct sk_buff *skb) 320 { 321 struct dst_entry *orig_dst = skb_dst(skb); 322 struct dst_entry *dst = NULL; 323 struct seg6_lwt *slwt; 324 int err; 325 326 err = seg6_do_srh(skb); 327 if (unlikely(err)) { 328 kfree_skb(skb); 329 return err; 330 } 331 332 slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); 333 334 preempt_disable(); 335 dst = dst_cache_get(&slwt->cache); 336 preempt_enable(); 337 338 skb_dst_drop(skb); 339 340 if (!dst) { 341 ip6_route_input(skb); 342 dst = skb_dst(skb); 343 if (!dst->error) { 344 preempt_disable(); 345 dst_cache_set_ip6(&slwt->cache, dst, 346 &ipv6_hdr(skb)->saddr); 347 preempt_enable(); 348 } 349 } else { 350 skb_dst_set(skb, dst); 351 } 352 353 err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); 354 if (unlikely(err)) 355 return err; 356 357 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 358 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, 359 dev_net(skb->dev), NULL, skb, NULL, 360 skb_dst(skb)->dev, seg6_input_finish); 361 362 return seg6_input_finish(dev_net(skb->dev), NULL, skb); 363 } 364 365 static int seg6_input_nf(struct sk_buff *skb) 366 { 367 struct net_device *dev = skb_dst(skb)->dev; 368 struct net *net = dev_net(skb->dev); 369 370 switch (skb->protocol) { 371 case htons(ETH_P_IP): 372 return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL, 373 skb, NULL, dev, seg6_input_core); 374 case htons(ETH_P_IPV6): 375 return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL, 376 skb, NULL, dev, seg6_input_core); 377 } 378 379 return -EINVAL; 380 } 381 382 static int seg6_input(struct sk_buff *skb) 383 { 384 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 385 return seg6_input_nf(skb); 386 387 return seg6_input_core(dev_net(skb->dev), NULL, skb); 388 } 389 390 static int seg6_output_core(struct net *net, struct sock *sk, 391 struct sk_buff *skb) 392 { 393 struct dst_entry *orig_dst = skb_dst(skb); 394 struct dst_entry *dst = NULL; 395 struct seg6_lwt *slwt; 396 int err; 397 398 err = seg6_do_srh(skb); 399 if (unlikely(err)) 400 goto drop; 401 402 slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); 403 404 preempt_disable(); 405 dst = dst_cache_get(&slwt->cache); 406 preempt_enable(); 407 408 if (unlikely(!dst)) { 409 struct ipv6hdr *hdr = ipv6_hdr(skb); 410 struct flowi6 fl6; 411 412 memset(&fl6, 0, sizeof(fl6)); 413 fl6.daddr = hdr->daddr; 414 fl6.saddr = hdr->saddr; 415 fl6.flowlabel = ip6_flowinfo(hdr); 416 fl6.flowi6_mark = skb->mark; 417 fl6.flowi6_proto = hdr->nexthdr; 418 419 dst = ip6_route_output(net, NULL, &fl6); 420 if (dst->error) { 421 err = dst->error; 422 dst_release(dst); 423 goto drop; 424 } 425 426 preempt_disable(); 427 dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); 428 preempt_enable(); 429 } 430 431 skb_dst_drop(skb); 432 skb_dst_set(skb, dst); 433 434 err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); 435 if (unlikely(err)) 436 goto drop; 437 438 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 439 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, 440 NULL, skb_dst(skb)->dev, dst_output); 441 442 return dst_output(net, sk, skb); 443 drop: 444 kfree_skb(skb); 445 return err; 446 } 447 448 static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb) 449 { 450 struct net_device *dev = skb_dst(skb)->dev; 451 452 switch (skb->protocol) { 453 case htons(ETH_P_IP): 454 return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, 455 NULL, dev, seg6_output_core); 456 case htons(ETH_P_IPV6): 457 return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, 458 NULL, dev, seg6_output_core); 459 } 460 461 return -EINVAL; 462 } 463 464 static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 465 { 466 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 467 return seg6_output_nf(net, sk, skb); 468 469 return seg6_output_core(net, sk, skb); 470 } 471 472 static int seg6_build_state(struct net *net, struct nlattr *nla, 473 unsigned int family, const void *cfg, 474 struct lwtunnel_state **ts, 475 struct netlink_ext_ack *extack) 476 { 477 struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1]; 478 struct seg6_iptunnel_encap *tuninfo; 479 struct lwtunnel_state *newts; 480 int tuninfo_len, min_size; 481 struct seg6_lwt *slwt; 482 int err; 483 484 if (family != AF_INET && family != AF_INET6) 485 return -EINVAL; 486 487 err = nla_parse_nested_deprecated(tb, SEG6_IPTUNNEL_MAX, nla, 488 seg6_iptunnel_policy, extack); 489 490 if (err < 0) 491 return err; 492 493 if (!tb[SEG6_IPTUNNEL_SRH]) 494 return -EINVAL; 495 496 tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]); 497 tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]); 498 499 /* tuninfo must contain at least the iptunnel encap structure, 500 * the SRH and one segment 501 */ 502 min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) + 503 sizeof(struct in6_addr); 504 if (tuninfo_len < min_size) 505 return -EINVAL; 506 507 switch (tuninfo->mode) { 508 case SEG6_IPTUN_MODE_INLINE: 509 if (family != AF_INET6) 510 return -EINVAL; 511 512 break; 513 case SEG6_IPTUN_MODE_ENCAP: 514 break; 515 case SEG6_IPTUN_MODE_L2ENCAP: 516 break; 517 default: 518 return -EINVAL; 519 } 520 521 /* verify that SRH is consistent */ 522 if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo), false)) 523 return -EINVAL; 524 525 newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt)); 526 if (!newts) 527 return -ENOMEM; 528 529 slwt = seg6_lwt_lwtunnel(newts); 530 531 err = dst_cache_init(&slwt->cache, GFP_ATOMIC); 532 if (err) { 533 kfree(newts); 534 return err; 535 } 536 537 memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); 538 539 newts->type = LWTUNNEL_ENCAP_SEG6; 540 newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; 541 542 if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP) 543 newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; 544 545 newts->headroom = seg6_lwt_headroom(tuninfo); 546 547 *ts = newts; 548 549 return 0; 550 } 551 552 static void seg6_destroy_state(struct lwtunnel_state *lwt) 553 { 554 dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache); 555 } 556 557 static int seg6_fill_encap_info(struct sk_buff *skb, 558 struct lwtunnel_state *lwtstate) 559 { 560 struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); 561 562 if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo)) 563 return -EMSGSIZE; 564 565 return 0; 566 } 567 568 static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate) 569 { 570 struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); 571 572 return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); 573 } 574 575 static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) 576 { 577 struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a); 578 struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b); 579 int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr); 580 581 if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr)) 582 return 1; 583 584 return memcmp(a_hdr, b_hdr, len); 585 } 586 587 static const struct lwtunnel_encap_ops seg6_iptun_ops = { 588 .build_state = seg6_build_state, 589 .destroy_state = seg6_destroy_state, 590 .output = seg6_output, 591 .input = seg6_input, 592 .fill_encap = seg6_fill_encap_info, 593 .get_encap_size = seg6_encap_nlsize, 594 .cmp_encap = seg6_encap_cmp, 595 .owner = THIS_MODULE, 596 }; 597 598 int __init seg6_iptunnel_init(void) 599 { 600 return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); 601 } 602 603 void seg6_iptunnel_exit(void) 604 { 605 lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); 606 } 607