1 // SPDX-License-Identifier: GPL-2.0 2 3 /* In-place tunneling */ 4 5 #include <stdbool.h> 6 #include <string.h> 7 8 #include <linux/stddef.h> 9 #include <linux/bpf.h> 10 #include <linux/if_ether.h> 11 #include <linux/in.h> 12 #include <linux/ip.h> 13 #include <linux/ipv6.h> 14 #include <linux/mpls.h> 15 #include <linux/tcp.h> 16 #include <linux/udp.h> 17 #include <linux/pkt_cls.h> 18 #include <linux/types.h> 19 20 #include <bpf/bpf_endian.h> 21 #include <bpf/bpf_helpers.h> 22 23 static const int cfg_port = 8000; 24 25 static const int cfg_udp_src = 20000; 26 27 #define UDP_PORT 5555 28 #define MPLS_OVER_UDP_PORT 6635 29 #define ETH_OVER_UDP_PORT 7777 30 31 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */ 32 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 | 33 MPLS_LS_S_MASK | 0xff); 34 35 struct gre_hdr { 36 __be16 flags; 37 __be16 protocol; 38 } __attribute__((packed)); 39 40 union l4hdr { 41 struct udphdr udp; 42 struct gre_hdr gre; 43 }; 44 45 struct v4hdr { 46 struct iphdr ip; 47 union l4hdr l4hdr; 48 __u8 pad[16]; /* enough space for L2 header */ 49 } __attribute__((packed)); 50 51 struct v6hdr { 52 struct ipv6hdr ip; 53 union l4hdr l4hdr; 54 __u8 pad[16]; /* enough space for L2 header */ 55 } __attribute__((packed)); 56 57 static __always_inline void set_ipv4_csum(struct iphdr *iph) 58 { 59 __u16 *iph16 = (__u16 *)iph; 60 __u32 csum; 61 int i; 62 63 iph->check = 0; 64 65 #pragma clang loop unroll(full) 66 for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++) 67 csum += *iph16++; 68 69 iph->check = ~((csum & 0xffff) + (csum >> 16)); 70 } 71 72 static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, 73 __u16 l2_proto) 74 { 75 __u16 udp_dst = UDP_PORT; 76 struct iphdr iph_inner; 77 struct v4hdr h_outer; 78 struct tcphdr tcph; 79 int olen, l2_len; 80 int tcp_off; 81 __u64 flags; 82 83 /* Most tests encapsulate a packet into a tunnel with the same 84 * network protocol, and derive the outer header fields from 85 * the inner header. 86 * 87 * The 6in4 case tests different inner and outer protocols. As 88 * the inner is ipv6, but the outer expects an ipv4 header as 89 * input, manually build a struct iphdr based on the ipv6hdr. 90 */ 91 if (encap_proto == IPPROTO_IPV6) { 92 const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1; 93 const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2; 94 struct ipv6hdr iph6_inner; 95 96 /* Read the IPv6 header */ 97 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner, 98 sizeof(iph6_inner)) < 0) 99 return TC_ACT_OK; 100 101 /* Derive the IPv4 header fields from the IPv6 header */ 102 memset(&iph_inner, 0, sizeof(iph_inner)); 103 iph_inner.version = 4; 104 iph_inner.ihl = 5; 105 iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) + 106 bpf_ntohs(iph6_inner.payload_len)); 107 iph_inner.ttl = iph6_inner.hop_limit - 1; 108 iph_inner.protocol = iph6_inner.nexthdr; 109 iph_inner.saddr = __bpf_constant_htonl(saddr); 110 iph_inner.daddr = __bpf_constant_htonl(daddr); 111 112 tcp_off = sizeof(iph6_inner); 113 } else { 114 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner, 115 sizeof(iph_inner)) < 0) 116 return TC_ACT_OK; 117 118 tcp_off = sizeof(iph_inner); 119 } 120 121 /* filter only packets we want */ 122 if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP) 123 return TC_ACT_OK; 124 125 if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off, 126 &tcph, sizeof(tcph)) < 0) 127 return TC_ACT_OK; 128 129 if (tcph.dest != __bpf_constant_htons(cfg_port)) 130 return TC_ACT_OK; 131 132 olen = sizeof(h_outer.ip); 133 l2_len = 0; 134 135 flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4; 136 137 switch (l2_proto) { 138 case ETH_P_MPLS_UC: 139 l2_len = sizeof(mpls_label); 140 udp_dst = MPLS_OVER_UDP_PORT; 141 break; 142 case ETH_P_TEB: 143 l2_len = ETH_HLEN; 144 udp_dst = ETH_OVER_UDP_PORT; 145 break; 146 } 147 flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len); 148 149 switch (encap_proto) { 150 case IPPROTO_GRE: 151 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE; 152 olen += sizeof(h_outer.l4hdr.gre); 153 h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto); 154 h_outer.l4hdr.gre.flags = 0; 155 break; 156 case IPPROTO_UDP: 157 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP; 158 olen += sizeof(h_outer.l4hdr.udp); 159 h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src); 160 h_outer.l4hdr.udp.dest = bpf_htons(udp_dst); 161 h_outer.l4hdr.udp.check = 0; 162 h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) + 163 sizeof(h_outer.l4hdr.udp) + 164 l2_len); 165 break; 166 case IPPROTO_IPIP: 167 case IPPROTO_IPV6: 168 break; 169 default: 170 return TC_ACT_OK; 171 } 172 173 /* add L2 encap (if specified) */ 174 switch (l2_proto) { 175 case ETH_P_MPLS_UC: 176 *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label; 177 break; 178 case ETH_P_TEB: 179 if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen, 180 ETH_HLEN)) 181 return TC_ACT_SHOT; 182 break; 183 } 184 olen += l2_len; 185 186 /* add room between mac and network header */ 187 if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags)) 188 return TC_ACT_SHOT; 189 190 /* prepare new outer network header */ 191 h_outer.ip = iph_inner; 192 h_outer.ip.tot_len = bpf_htons(olen + 193 bpf_ntohs(h_outer.ip.tot_len)); 194 h_outer.ip.protocol = encap_proto; 195 196 set_ipv4_csum((void *)&h_outer.ip); 197 198 /* store new outer network header */ 199 if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen, 200 BPF_F_INVALIDATE_HASH) < 0) 201 return TC_ACT_SHOT; 202 203 /* if changing outer proto type, update eth->h_proto */ 204 if (encap_proto == IPPROTO_IPV6) { 205 struct ethhdr eth; 206 207 if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth)) < 0) 208 return TC_ACT_SHOT; 209 eth.h_proto = bpf_htons(ETH_P_IP); 210 if (bpf_skb_store_bytes(skb, 0, ð, sizeof(eth), 0) < 0) 211 return TC_ACT_SHOT; 212 } 213 214 return TC_ACT_OK; 215 } 216 217 static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, 218 __u16 l2_proto) 219 { 220 __u16 udp_dst = UDP_PORT; 221 struct ipv6hdr iph_inner; 222 struct v6hdr h_outer; 223 struct tcphdr tcph; 224 int olen, l2_len; 225 __u16 tot_len; 226 __u64 flags; 227 228 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner, 229 sizeof(iph_inner)) < 0) 230 return TC_ACT_OK; 231 232 /* filter only packets we want */ 233 if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner), 234 &tcph, sizeof(tcph)) < 0) 235 return TC_ACT_OK; 236 237 if (tcph.dest != __bpf_constant_htons(cfg_port)) 238 return TC_ACT_OK; 239 240 olen = sizeof(h_outer.ip); 241 l2_len = 0; 242 243 flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6; 244 245 switch (l2_proto) { 246 case ETH_P_MPLS_UC: 247 l2_len = sizeof(mpls_label); 248 udp_dst = MPLS_OVER_UDP_PORT; 249 break; 250 case ETH_P_TEB: 251 l2_len = ETH_HLEN; 252 udp_dst = ETH_OVER_UDP_PORT; 253 break; 254 } 255 flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len); 256 257 switch (encap_proto) { 258 case IPPROTO_GRE: 259 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE; 260 olen += sizeof(h_outer.l4hdr.gre); 261 h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto); 262 h_outer.l4hdr.gre.flags = 0; 263 break; 264 case IPPROTO_UDP: 265 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP; 266 olen += sizeof(h_outer.l4hdr.udp); 267 h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src); 268 h_outer.l4hdr.udp.dest = bpf_htons(udp_dst); 269 tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) + 270 sizeof(h_outer.l4hdr.udp); 271 h_outer.l4hdr.udp.check = 0; 272 h_outer.l4hdr.udp.len = bpf_htons(tot_len); 273 break; 274 case IPPROTO_IPV6: 275 break; 276 default: 277 return TC_ACT_OK; 278 } 279 280 /* add L2 encap (if specified) */ 281 switch (l2_proto) { 282 case ETH_P_MPLS_UC: 283 *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label; 284 break; 285 case ETH_P_TEB: 286 if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen, 287 ETH_HLEN)) 288 return TC_ACT_SHOT; 289 break; 290 } 291 olen += l2_len; 292 293 /* add room between mac and network header */ 294 if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags)) 295 return TC_ACT_SHOT; 296 297 /* prepare new outer network header */ 298 h_outer.ip = iph_inner; 299 h_outer.ip.payload_len = bpf_htons(olen + 300 bpf_ntohs(h_outer.ip.payload_len)); 301 302 h_outer.ip.nexthdr = encap_proto; 303 304 /* store new outer network header */ 305 if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen, 306 BPF_F_INVALIDATE_HASH) < 0) 307 return TC_ACT_SHOT; 308 309 return TC_ACT_OK; 310 } 311 312 SEC("encap_ipip_none") 313 int __encap_ipip_none(struct __sk_buff *skb) 314 { 315 if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) 316 return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP); 317 else 318 return TC_ACT_OK; 319 } 320 321 SEC("encap_gre_none") 322 int __encap_gre_none(struct __sk_buff *skb) 323 { 324 if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) 325 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP); 326 else 327 return TC_ACT_OK; 328 } 329 330 SEC("encap_gre_mpls") 331 int __encap_gre_mpls(struct __sk_buff *skb) 332 { 333 if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) 334 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC); 335 else 336 return TC_ACT_OK; 337 } 338 339 SEC("encap_gre_eth") 340 int __encap_gre_eth(struct __sk_buff *skb) 341 { 342 if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) 343 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB); 344 else 345 return TC_ACT_OK; 346 } 347 348 SEC("encap_udp_none") 349 int __encap_udp_none(struct __sk_buff *skb) 350 { 351 if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) 352 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP); 353 else 354 return TC_ACT_OK; 355 } 356 357 SEC("encap_udp_mpls") 358 int __encap_udp_mpls(struct __sk_buff *skb) 359 { 360 if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) 361 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC); 362 else 363 return TC_ACT_OK; 364 } 365 366 SEC("encap_udp_eth") 367 int __encap_udp_eth(struct __sk_buff *skb) 368 { 369 if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) 370 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB); 371 else 372 return TC_ACT_OK; 373 } 374 375 SEC("encap_sit_none") 376 int __encap_sit_none(struct __sk_buff *skb) 377 { 378 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) 379 return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP); 380 else 381 return TC_ACT_OK; 382 } 383 384 SEC("encap_ip6tnl_none") 385 int __encap_ip6tnl_none(struct __sk_buff *skb) 386 { 387 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) 388 return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6); 389 else 390 return TC_ACT_OK; 391 } 392 393 SEC("encap_ip6gre_none") 394 int __encap_ip6gre_none(struct __sk_buff *skb) 395 { 396 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) 397 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6); 398 else 399 return TC_ACT_OK; 400 } 401 402 SEC("encap_ip6gre_mpls") 403 int __encap_ip6gre_mpls(struct __sk_buff *skb) 404 { 405 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) 406 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC); 407 else 408 return TC_ACT_OK; 409 } 410 411 SEC("encap_ip6gre_eth") 412 int __encap_ip6gre_eth(struct __sk_buff *skb) 413 { 414 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) 415 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB); 416 else 417 return TC_ACT_OK; 418 } 419 420 SEC("encap_ip6udp_none") 421 int __encap_ip6udp_none(struct __sk_buff *skb) 422 { 423 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) 424 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6); 425 else 426 return TC_ACT_OK; 427 } 428 429 SEC("encap_ip6udp_mpls") 430 int __encap_ip6udp_mpls(struct __sk_buff *skb) 431 { 432 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) 433 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC); 434 else 435 return TC_ACT_OK; 436 } 437 438 SEC("encap_ip6udp_eth") 439 int __encap_ip6udp_eth(struct __sk_buff *skb) 440 { 441 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) 442 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB); 443 else 444 return TC_ACT_OK; 445 } 446 447 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) 448 { 449 char buf[sizeof(struct v6hdr)]; 450 struct gre_hdr greh; 451 struct udphdr udph; 452 int olen = len; 453 454 switch (proto) { 455 case IPPROTO_IPIP: 456 case IPPROTO_IPV6: 457 break; 458 case IPPROTO_GRE: 459 olen += sizeof(struct gre_hdr); 460 if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0) 461 return TC_ACT_OK; 462 switch (bpf_ntohs(greh.protocol)) { 463 case ETH_P_MPLS_UC: 464 olen += sizeof(mpls_label); 465 break; 466 case ETH_P_TEB: 467 olen += ETH_HLEN; 468 break; 469 } 470 break; 471 case IPPROTO_UDP: 472 olen += sizeof(struct udphdr); 473 if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0) 474 return TC_ACT_OK; 475 switch (bpf_ntohs(udph.dest)) { 476 case MPLS_OVER_UDP_PORT: 477 olen += sizeof(mpls_label); 478 break; 479 case ETH_OVER_UDP_PORT: 480 olen += ETH_HLEN; 481 break; 482 } 483 break; 484 default: 485 return TC_ACT_OK; 486 } 487 488 if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, 489 BPF_F_ADJ_ROOM_FIXED_GSO)) 490 return TC_ACT_SHOT; 491 492 return TC_ACT_OK; 493 } 494 495 static int decap_ipv4(struct __sk_buff *skb) 496 { 497 struct iphdr iph_outer; 498 499 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer, 500 sizeof(iph_outer)) < 0) 501 return TC_ACT_OK; 502 503 if (iph_outer.ihl != 5) 504 return TC_ACT_OK; 505 506 return decap_internal(skb, ETH_HLEN, sizeof(iph_outer), 507 iph_outer.protocol); 508 } 509 510 static int decap_ipv6(struct __sk_buff *skb) 511 { 512 struct ipv6hdr iph_outer; 513 514 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer, 515 sizeof(iph_outer)) < 0) 516 return TC_ACT_OK; 517 518 return decap_internal(skb, ETH_HLEN, sizeof(iph_outer), 519 iph_outer.nexthdr); 520 } 521 522 SEC("decap") 523 int decap_f(struct __sk_buff *skb) 524 { 525 switch (skb->protocol) { 526 case __bpf_constant_htons(ETH_P_IP): 527 return decap_ipv4(skb); 528 case __bpf_constant_htons(ETH_P_IPV6): 529 return decap_ipv6(skb); 530 default: 531 /* does not match, ignore */ 532 return TC_ACT_OK; 533 } 534 } 535 536 char __license[] SEC("license") = "GPL"; 537