1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * SR-IPv6 implementation 4 * 5 * Author: 6 * David Lebrun <david.lebrun@uclouvain.be> 7 */ 8 9 #include <linux/types.h> 10 #include <linux/skbuff.h> 11 #include <linux/net.h> 12 #include <linux/module.h> 13 #include <net/ip.h> 14 #include <net/ip_tunnels.h> 15 #include <net/lwtunnel.h> 16 #include <net/netevent.h> 17 #include <net/netns/generic.h> 18 #include <net/ip6_fib.h> 19 #include <net/route.h> 20 #include <net/seg6.h> 21 #include <linux/seg6.h> 22 #include <linux/seg6_iptunnel.h> 23 #include <net/addrconf.h> 24 #include <net/ip6_route.h> 25 #include <net/dst_cache.h> 26 #ifdef CONFIG_IPV6_SEG6_HMAC 27 #include <net/seg6_hmac.h> 28 #endif 29 30 static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) 31 { 32 int head = 0; 33 34 switch (tuninfo->mode) { 35 case SEG6_IPTUN_MODE_INLINE: 36 break; 37 case SEG6_IPTUN_MODE_ENCAP: 38 head = sizeof(struct ipv6hdr); 39 break; 40 case SEG6_IPTUN_MODE_L2ENCAP: 41 return 0; 42 } 43 44 return ((tuninfo->srh->hdrlen + 1) << 3) + head; 45 } 46 47 struct seg6_lwt { 48 struct dst_cache cache; 49 struct seg6_iptunnel_encap tuninfo[]; 50 }; 51 52 static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt) 53 { 54 return (struct seg6_lwt *)lwt->data; 55 } 56 57 static inline struct seg6_iptunnel_encap * 58 seg6_encap_lwtunnel(struct lwtunnel_state *lwt) 59 { 60 return seg6_lwt_lwtunnel(lwt)->tuninfo; 61 } 62 63 static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = { 64 [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY }, 65 }; 66 67 static int nla_put_srh(struct sk_buff *skb, int attrtype, 68 struct seg6_iptunnel_encap *tuninfo) 69 { 70 struct seg6_iptunnel_encap *data; 71 struct nlattr *nla; 72 int len; 73 74 len = SEG6_IPTUN_ENCAP_SIZE(tuninfo); 75 76 nla = nla_reserve(skb, attrtype, len); 77 if (!nla) 78 return -EMSGSIZE; 79 80 data = nla_data(nla); 81 memcpy(data, tuninfo, len); 82 83 return 0; 84 } 85 86 static void set_tun_src(struct net *net, struct net_device *dev, 87 struct in6_addr *daddr, struct in6_addr *saddr) 88 { 89 struct seg6_pernet_data *sdata = seg6_pernet(net); 90 struct in6_addr *tun_src; 91 92 rcu_read_lock(); 93 94 tun_src = rcu_dereference(sdata->tun_src); 95 96 if (!ipv6_addr_any(tun_src)) { 97 memcpy(saddr, tun_src, sizeof(struct in6_addr)); 98 } else { 99 ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC, 100 saddr); 101 } 102 103 rcu_read_unlock(); 104 } 105 106 /* Compute flowlabel for outer IPv6 header */ 107 static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb, 108 struct ipv6hdr *inner_hdr) 109 { 110 int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel; 111 __be32 flowlabel = 0; 112 u32 hash; 113 114 if (do_flowlabel > 0) { 115 hash = skb_get_hash(skb); 116 hash = rol32(hash, 16); 117 flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; 118 } else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) { 119 flowlabel = ip6_flowlabel(inner_hdr); 120 } 121 return flowlabel; 122 } 123 124 /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ 125 int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) 126 { 127 struct dst_entry *dst = skb_dst(skb); 128 struct net *net = dev_net(dst->dev); 129 struct ipv6hdr *hdr, *inner_hdr; 130 struct ipv6_sr_hdr *isrh; 131 int hdrlen, tot_len, err; 132 __be32 flowlabel; 133 134 hdrlen = (osrh->hdrlen + 1) << 3; 135 tot_len = hdrlen + sizeof(*hdr); 136 137 err = skb_cow_head(skb, tot_len + skb->mac_len); 138 if (unlikely(err)) 139 return err; 140 141 inner_hdr = ipv6_hdr(skb); 142 flowlabel = seg6_make_flowlabel(net, skb, inner_hdr); 143 144 skb_push(skb, tot_len); 145 skb_reset_network_header(skb); 146 skb_mac_header_rebuild(skb); 147 hdr = ipv6_hdr(skb); 148 149 /* inherit tc, flowlabel and hlim 150 * hlim will be decremented in ip6_forward() afterwards and 151 * decapsulation will overwrite inner hlim with outer hlim 152 */ 153 154 if (skb->protocol == htons(ETH_P_IPV6)) { 155 ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), 156 flowlabel); 157 hdr->hop_limit = inner_hdr->hop_limit; 158 } else { 159 ip6_flow_hdr(hdr, 0, flowlabel); 160 hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); 161 162 memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); 163 } 164 165 hdr->nexthdr = NEXTHDR_ROUTING; 166 167 isrh = (void *)hdr + sizeof(*hdr); 168 memcpy(isrh, osrh, hdrlen); 169 170 isrh->nexthdr = proto; 171 172 hdr->daddr = isrh->segments[isrh->first_segment]; 173 set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); 174 175 #ifdef CONFIG_IPV6_SEG6_HMAC 176 if (sr_has_hmac(isrh)) { 177 err = seg6_push_hmac(net, &hdr->saddr, isrh); 178 if (unlikely(err)) 179 return err; 180 } 181 #endif 182 183 skb_postpush_rcsum(skb, hdr, tot_len); 184 185 return 0; 186 } 187 EXPORT_SYMBOL_GPL(seg6_do_srh_encap); 188 189 /* insert an SRH within an IPv6 packet, just after the IPv6 header */ 190 int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) 191 { 192 struct ipv6hdr *hdr, *oldhdr; 193 struct ipv6_sr_hdr *isrh; 194 int hdrlen, err; 195 196 hdrlen = (osrh->hdrlen + 1) << 3; 197 198 err = skb_cow_head(skb, hdrlen + skb->mac_len); 199 if (unlikely(err)) 200 return err; 201 202 oldhdr = ipv6_hdr(skb); 203 204 skb_pull(skb, sizeof(struct ipv6hdr)); 205 skb_postpull_rcsum(skb, skb_network_header(skb), 206 sizeof(struct ipv6hdr)); 207 208 skb_push(skb, sizeof(struct ipv6hdr) + hdrlen); 209 skb_reset_network_header(skb); 210 skb_mac_header_rebuild(skb); 211 212 hdr = ipv6_hdr(skb); 213 214 memmove(hdr, oldhdr, sizeof(*hdr)); 215 216 isrh = (void *)hdr + sizeof(*hdr); 217 memcpy(isrh, osrh, hdrlen); 218 219 isrh->nexthdr = hdr->nexthdr; 220 hdr->nexthdr = NEXTHDR_ROUTING; 221 222 isrh->segments[0] = hdr->daddr; 223 hdr->daddr = isrh->segments[isrh->first_segment]; 224 225 #ifdef CONFIG_IPV6_SEG6_HMAC 226 if (sr_has_hmac(isrh)) { 227 struct net *net = dev_net(skb_dst(skb)->dev); 228 229 err = seg6_push_hmac(net, &hdr->saddr, isrh); 230 if (unlikely(err)) 231 return err; 232 } 233 #endif 234 235 skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); 236 237 return 0; 238 } 239 EXPORT_SYMBOL_GPL(seg6_do_srh_inline); 240 241 static int seg6_do_srh(struct sk_buff *skb) 242 { 243 struct dst_entry *dst = skb_dst(skb); 244 struct seg6_iptunnel_encap *tinfo; 245 int proto, err = 0; 246 247 tinfo = seg6_encap_lwtunnel(dst->lwtstate); 248 249 switch (tinfo->mode) { 250 case SEG6_IPTUN_MODE_INLINE: 251 if (skb->protocol != htons(ETH_P_IPV6)) 252 return -EINVAL; 253 254 err = seg6_do_srh_inline(skb, tinfo->srh); 255 if (err) 256 return err; 257 break; 258 case SEG6_IPTUN_MODE_ENCAP: 259 err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6); 260 if (err) 261 return err; 262 263 if (skb->protocol == htons(ETH_P_IPV6)) 264 proto = IPPROTO_IPV6; 265 else if (skb->protocol == htons(ETH_P_IP)) 266 proto = IPPROTO_IPIP; 267 else 268 return -EINVAL; 269 270 err = seg6_do_srh_encap(skb, tinfo->srh, proto); 271 if (err) 272 return err; 273 274 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 275 skb_set_inner_protocol(skb, skb->protocol); 276 skb->protocol = htons(ETH_P_IPV6); 277 break; 278 case SEG6_IPTUN_MODE_L2ENCAP: 279 if (!skb_mac_header_was_set(skb)) 280 return -EINVAL; 281 282 if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0) 283 return -ENOMEM; 284 285 skb_mac_header_rebuild(skb); 286 skb_push(skb, skb->mac_len); 287 288 err = seg6_do_srh_encap(skb, tinfo->srh, IPPROTO_ETHERNET); 289 if (err) 290 return err; 291 292 skb->protocol = htons(ETH_P_IPV6); 293 break; 294 } 295 296 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 297 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 298 299 return 0; 300 } 301 302 static int seg6_input(struct sk_buff *skb) 303 { 304 struct dst_entry *orig_dst = skb_dst(skb); 305 struct dst_entry *dst = NULL; 306 struct seg6_lwt *slwt; 307 int err; 308 309 err = seg6_do_srh(skb); 310 if (unlikely(err)) { 311 kfree_skb(skb); 312 return err; 313 } 314 315 slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); 316 317 preempt_disable(); 318 dst = dst_cache_get(&slwt->cache); 319 preempt_enable(); 320 321 skb_dst_drop(skb); 322 323 if (!dst) { 324 ip6_route_input(skb); 325 dst = skb_dst(skb); 326 if (!dst->error) { 327 preempt_disable(); 328 dst_cache_set_ip6(&slwt->cache, dst, 329 &ipv6_hdr(skb)->saddr); 330 preempt_enable(); 331 } 332 } else { 333 skb_dst_set(skb, dst); 334 } 335 336 err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); 337 if (unlikely(err)) 338 return err; 339 340 return dst_input(skb); 341 } 342 343 static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 344 { 345 struct dst_entry *orig_dst = skb_dst(skb); 346 struct dst_entry *dst = NULL; 347 struct seg6_lwt *slwt; 348 int err = -EINVAL; 349 350 err = seg6_do_srh(skb); 351 if (unlikely(err)) 352 goto drop; 353 354 slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); 355 356 preempt_disable(); 357 dst = dst_cache_get(&slwt->cache); 358 preempt_enable(); 359 360 if (unlikely(!dst)) { 361 struct ipv6hdr *hdr = ipv6_hdr(skb); 362 struct flowi6 fl6; 363 364 memset(&fl6, 0, sizeof(fl6)); 365 fl6.daddr = hdr->daddr; 366 fl6.saddr = hdr->saddr; 367 fl6.flowlabel = ip6_flowinfo(hdr); 368 fl6.flowi6_mark = skb->mark; 369 fl6.flowi6_proto = hdr->nexthdr; 370 371 dst = ip6_route_output(net, NULL, &fl6); 372 if (dst->error) { 373 err = dst->error; 374 dst_release(dst); 375 goto drop; 376 } 377 378 preempt_disable(); 379 dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); 380 preempt_enable(); 381 } 382 383 skb_dst_drop(skb); 384 skb_dst_set(skb, dst); 385 386 err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); 387 if (unlikely(err)) 388 goto drop; 389 390 return dst_output(net, sk, skb); 391 drop: 392 kfree_skb(skb); 393 return err; 394 } 395 396 static int seg6_build_state(struct net *net, struct nlattr *nla, 397 unsigned int family, const void *cfg, 398 struct lwtunnel_state **ts, 399 struct netlink_ext_ack *extack) 400 { 401 struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1]; 402 struct seg6_iptunnel_encap *tuninfo; 403 struct lwtunnel_state *newts; 404 int tuninfo_len, min_size; 405 struct seg6_lwt *slwt; 406 int err; 407 408 if (family != AF_INET && family != AF_INET6) 409 return -EINVAL; 410 411 err = nla_parse_nested_deprecated(tb, SEG6_IPTUNNEL_MAX, nla, 412 seg6_iptunnel_policy, extack); 413 414 if (err < 0) 415 return err; 416 417 if (!tb[SEG6_IPTUNNEL_SRH]) 418 return -EINVAL; 419 420 tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]); 421 tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]); 422 423 /* tuninfo must contain at least the iptunnel encap structure, 424 * the SRH and one segment 425 */ 426 min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) + 427 sizeof(struct in6_addr); 428 if (tuninfo_len < min_size) 429 return -EINVAL; 430 431 switch (tuninfo->mode) { 432 case SEG6_IPTUN_MODE_INLINE: 433 if (family != AF_INET6) 434 return -EINVAL; 435 436 break; 437 case SEG6_IPTUN_MODE_ENCAP: 438 break; 439 case SEG6_IPTUN_MODE_L2ENCAP: 440 break; 441 default: 442 return -EINVAL; 443 } 444 445 /* verify that SRH is consistent */ 446 if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo), false)) 447 return -EINVAL; 448 449 newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt)); 450 if (!newts) 451 return -ENOMEM; 452 453 slwt = seg6_lwt_lwtunnel(newts); 454 455 err = dst_cache_init(&slwt->cache, GFP_ATOMIC); 456 if (err) { 457 kfree(newts); 458 return err; 459 } 460 461 memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); 462 463 newts->type = LWTUNNEL_ENCAP_SEG6; 464 newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; 465 466 if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP) 467 newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; 468 469 newts->headroom = seg6_lwt_headroom(tuninfo); 470 471 *ts = newts; 472 473 return 0; 474 } 475 476 static void seg6_destroy_state(struct lwtunnel_state *lwt) 477 { 478 dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache); 479 } 480 481 static int seg6_fill_encap_info(struct sk_buff *skb, 482 struct lwtunnel_state *lwtstate) 483 { 484 struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); 485 486 if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo)) 487 return -EMSGSIZE; 488 489 return 0; 490 } 491 492 static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate) 493 { 494 struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); 495 496 return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); 497 } 498 499 static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) 500 { 501 struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a); 502 struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b); 503 int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr); 504 505 if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr)) 506 return 1; 507 508 return memcmp(a_hdr, b_hdr, len); 509 } 510 511 static const struct lwtunnel_encap_ops seg6_iptun_ops = { 512 .build_state = seg6_build_state, 513 .destroy_state = seg6_destroy_state, 514 .output = seg6_output, 515 .input = seg6_input, 516 .fill_encap = seg6_fill_encap_info, 517 .get_encap_size = seg6_encap_nlsize, 518 .cmp_encap = seg6_encap_cmp, 519 .owner = THIS_MODULE, 520 }; 521 522 int __init seg6_iptunnel_init(void) 523 { 524 return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); 525 } 526 527 void seg6_iptunnel_exit(void) 528 { 529 lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); 530 } 531