xref: /openbmc/linux/tools/testing/selftests/bpf/progs/test_tc_tunnel.c (revision 248ed9e227e6cf59acb1aaf3aa30d530a0232c1a)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /* In-place tunneling */
4 
5 #include <stdbool.h>
6 #include <string.h>
7 
8 #include <linux/stddef.h>
9 #include <linux/bpf.h>
10 #include <linux/if_ether.h>
11 #include <linux/in.h>
12 #include <linux/ip.h>
13 #include <linux/ipv6.h>
14 #include <linux/mpls.h>
15 #include <linux/tcp.h>
16 #include <linux/udp.h>
17 #include <linux/pkt_cls.h>
18 #include <linux/types.h>
19 
20 #include <bpf/bpf_endian.h>
21 #include <bpf/bpf_helpers.h>
22 
23 static const int cfg_port = 8000;
24 
25 static const int cfg_udp_src = 20000;
26 
27 #define	L2_PAD_SZ	(sizeof(struct vxlanhdr) + ETH_HLEN)
28 
29 #define	UDP_PORT		5555
30 #define	MPLS_OVER_UDP_PORT	6635
31 #define	ETH_OVER_UDP_PORT	7777
32 #define	VXLAN_UDP_PORT		8472
33 
34 #define	EXTPROTO_VXLAN	0x1
35 
36 #define	VXLAN_N_VID     (1u << 24)
37 #define	VXLAN_VNI_MASK	bpf_htonl((VXLAN_N_VID - 1) << 8)
38 #define	VXLAN_FLAGS     0x8
39 #define	VXLAN_VNI       1
40 
41 #ifndef NEXTHDR_DEST
42 #define NEXTHDR_DEST	60
43 #endif
44 
45 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
46 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
47 						     MPLS_LS_S_MASK | 0xff);
48 
49 struct vxlanhdr {
50 	__be32 vx_flags;
51 	__be32 vx_vni;
52 } __attribute__((packed));
53 
54 struct gre_hdr {
55 	__be16 flags;
56 	__be16 protocol;
57 } __attribute__((packed));
58 
59 union l4hdr {
60 	struct udphdr udp;
61 	struct gre_hdr gre;
62 };
63 
64 struct v4hdr {
65 	struct iphdr ip;
66 	union l4hdr l4hdr;
67 	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
68 } __attribute__((packed));
69 
70 struct v6hdr {
71 	struct ipv6hdr ip;
72 	union l4hdr l4hdr;
73 	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
74 } __attribute__((packed));
75 
76 static __always_inline void set_ipv4_csum(struct iphdr *iph)
77 {
78 	__u16 *iph16 = (__u16 *)iph;
79 	__u32 csum;
80 	int i;
81 
82 	iph->check = 0;
83 
84 #pragma clang loop unroll(full)
85 	for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
86 		csum += *iph16++;
87 
88 	iph->check = ~((csum & 0xffff) + (csum >> 16));
89 }
90 
91 static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
92 					__u16 l2_proto, __u16 ext_proto)
93 {
94 	__u16 udp_dst = UDP_PORT;
95 	struct iphdr iph_inner;
96 	struct v4hdr h_outer;
97 	struct tcphdr tcph;
98 	int olen, l2_len;
99 	__u8 *l2_hdr = NULL;
100 	int tcp_off;
101 	__u64 flags;
102 
103 	/* Most tests encapsulate a packet into a tunnel with the same
104 	 * network protocol, and derive the outer header fields from
105 	 * the inner header.
106 	 *
107 	 * The 6in4 case tests different inner and outer protocols. As
108 	 * the inner is ipv6, but the outer expects an ipv4 header as
109 	 * input, manually build a struct iphdr based on the ipv6hdr.
110 	 */
111 	if (encap_proto == IPPROTO_IPV6) {
112 		const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
113 		const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
114 		struct ipv6hdr iph6_inner;
115 
116 		/* Read the IPv6 header */
117 		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
118 				       sizeof(iph6_inner)) < 0)
119 			return TC_ACT_OK;
120 
121 		/* Derive the IPv4 header fields from the IPv6 header */
122 		memset(&iph_inner, 0, sizeof(iph_inner));
123 		iph_inner.version = 4;
124 		iph_inner.ihl = 5;
125 		iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
126 				    bpf_ntohs(iph6_inner.payload_len));
127 		iph_inner.ttl = iph6_inner.hop_limit - 1;
128 		iph_inner.protocol = iph6_inner.nexthdr;
129 		iph_inner.saddr = __bpf_constant_htonl(saddr);
130 		iph_inner.daddr = __bpf_constant_htonl(daddr);
131 
132 		tcp_off = sizeof(iph6_inner);
133 	} else {
134 		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
135 				       sizeof(iph_inner)) < 0)
136 			return TC_ACT_OK;
137 
138 		tcp_off = sizeof(iph_inner);
139 	}
140 
141 	/* filter only packets we want */
142 	if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
143 		return TC_ACT_OK;
144 
145 	if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
146 			       &tcph, sizeof(tcph)) < 0)
147 		return TC_ACT_OK;
148 
149 	if (tcph.dest != __bpf_constant_htons(cfg_port))
150 		return TC_ACT_OK;
151 
152 	olen = sizeof(h_outer.ip);
153 	l2_len = 0;
154 
155 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
156 
157 	switch (l2_proto) {
158 	case ETH_P_MPLS_UC:
159 		l2_len = sizeof(mpls_label);
160 		udp_dst = MPLS_OVER_UDP_PORT;
161 		break;
162 	case ETH_P_TEB:
163 		l2_len = ETH_HLEN;
164 		if (ext_proto & EXTPROTO_VXLAN) {
165 			udp_dst = VXLAN_UDP_PORT;
166 			l2_len += sizeof(struct vxlanhdr);
167 		} else
168 			udp_dst = ETH_OVER_UDP_PORT;
169 		break;
170 	}
171 	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
172 
173 	switch (encap_proto) {
174 	case IPPROTO_GRE:
175 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
176 		olen += sizeof(h_outer.l4hdr.gre);
177 		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
178 		h_outer.l4hdr.gre.flags = 0;
179 		break;
180 	case IPPROTO_UDP:
181 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
182 		olen += sizeof(h_outer.l4hdr.udp);
183 		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
184 		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
185 		h_outer.l4hdr.udp.check = 0;
186 		h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
187 						  sizeof(h_outer.l4hdr.udp) +
188 						  l2_len);
189 		break;
190 	case IPPROTO_IPIP:
191 	case IPPROTO_IPV6:
192 		break;
193 	default:
194 		return TC_ACT_OK;
195 	}
196 
197 	/* add L2 encap (if specified) */
198 	l2_hdr = (__u8 *)&h_outer + olen;
199 	switch (l2_proto) {
200 	case ETH_P_MPLS_UC:
201 		*(__u32 *)l2_hdr = mpls_label;
202 		break;
203 	case ETH_P_TEB:
204 		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
205 
206 		if (ext_proto & EXTPROTO_VXLAN) {
207 			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
208 
209 			vxlan_hdr->vx_flags = VXLAN_FLAGS;
210 			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
211 
212 			l2_hdr += sizeof(struct vxlanhdr);
213 		}
214 
215 		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
216 			return TC_ACT_SHOT;
217 
218 		break;
219 	}
220 	olen += l2_len;
221 
222 	/* add room between mac and network header */
223 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
224 		return TC_ACT_SHOT;
225 
226 	/* prepare new outer network header */
227 	h_outer.ip = iph_inner;
228 	h_outer.ip.tot_len = bpf_htons(olen +
229 				       bpf_ntohs(h_outer.ip.tot_len));
230 	h_outer.ip.protocol = encap_proto;
231 
232 	set_ipv4_csum((void *)&h_outer.ip);
233 
234 	/* store new outer network header */
235 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
236 				BPF_F_INVALIDATE_HASH) < 0)
237 		return TC_ACT_SHOT;
238 
239 	/* if changing outer proto type, update eth->h_proto */
240 	if (encap_proto == IPPROTO_IPV6) {
241 		struct ethhdr eth;
242 
243 		if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
244 			return TC_ACT_SHOT;
245 		eth.h_proto = bpf_htons(ETH_P_IP);
246 		if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
247 			return TC_ACT_SHOT;
248 	}
249 
250 	return TC_ACT_OK;
251 }
252 
253 static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
254 				      __u16 l2_proto)
255 {
256 	return __encap_ipv4(skb, encap_proto, l2_proto, 0);
257 }
258 
259 static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
260 					__u16 l2_proto, __u16 ext_proto)
261 {
262 	__u16 udp_dst = UDP_PORT;
263 	struct ipv6hdr iph_inner;
264 	struct v6hdr h_outer;
265 	struct tcphdr tcph;
266 	int olen, l2_len;
267 	__u8 *l2_hdr = NULL;
268 	__u16 tot_len;
269 	__u64 flags;
270 
271 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
272 			       sizeof(iph_inner)) < 0)
273 		return TC_ACT_OK;
274 
275 	/* filter only packets we want */
276 	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
277 			       &tcph, sizeof(tcph)) < 0)
278 		return TC_ACT_OK;
279 
280 	if (tcph.dest != __bpf_constant_htons(cfg_port))
281 		return TC_ACT_OK;
282 
283 	olen = sizeof(h_outer.ip);
284 	l2_len = 0;
285 
286 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
287 
288 	switch (l2_proto) {
289 	case ETH_P_MPLS_UC:
290 		l2_len = sizeof(mpls_label);
291 		udp_dst = MPLS_OVER_UDP_PORT;
292 		break;
293 	case ETH_P_TEB:
294 		l2_len = ETH_HLEN;
295 		if (ext_proto & EXTPROTO_VXLAN) {
296 			udp_dst = VXLAN_UDP_PORT;
297 			l2_len += sizeof(struct vxlanhdr);
298 		} else
299 			udp_dst = ETH_OVER_UDP_PORT;
300 		break;
301 	}
302 	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
303 
304 	switch (encap_proto) {
305 	case IPPROTO_GRE:
306 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
307 		olen += sizeof(h_outer.l4hdr.gre);
308 		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
309 		h_outer.l4hdr.gre.flags = 0;
310 		break;
311 	case IPPROTO_UDP:
312 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
313 		olen += sizeof(h_outer.l4hdr.udp);
314 		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
315 		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
316 		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
317 			  sizeof(h_outer.l4hdr.udp) + l2_len;
318 		h_outer.l4hdr.udp.check = 0;
319 		h_outer.l4hdr.udp.len = bpf_htons(tot_len);
320 		break;
321 	case IPPROTO_IPV6:
322 		break;
323 	default:
324 		return TC_ACT_OK;
325 	}
326 
327 	/* add L2 encap (if specified) */
328 	l2_hdr = (__u8 *)&h_outer + olen;
329 	switch (l2_proto) {
330 	case ETH_P_MPLS_UC:
331 		*(__u32 *)l2_hdr = mpls_label;
332 		break;
333 	case ETH_P_TEB:
334 		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
335 
336 		if (ext_proto & EXTPROTO_VXLAN) {
337 			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
338 
339 			vxlan_hdr->vx_flags = VXLAN_FLAGS;
340 			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
341 
342 			l2_hdr += sizeof(struct vxlanhdr);
343 		}
344 
345 		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
346 			return TC_ACT_SHOT;
347 		break;
348 	}
349 	olen += l2_len;
350 
351 	/* add room between mac and network header */
352 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
353 		return TC_ACT_SHOT;
354 
355 	/* prepare new outer network header */
356 	h_outer.ip = iph_inner;
357 	h_outer.ip.payload_len = bpf_htons(olen +
358 					   bpf_ntohs(h_outer.ip.payload_len));
359 
360 	h_outer.ip.nexthdr = encap_proto;
361 
362 	/* store new outer network header */
363 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
364 				BPF_F_INVALIDATE_HASH) < 0)
365 		return TC_ACT_SHOT;
366 
367 	return TC_ACT_OK;
368 }
369 
370 static int encap_ipv6_ipip6(struct __sk_buff *skb)
371 {
372 	struct iphdr iph_inner;
373 	struct v6hdr h_outer;
374 	struct tcphdr tcph;
375 	struct ethhdr eth;
376 	__u64 flags;
377 	int olen;
378 
379 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
380 			       sizeof(iph_inner)) < 0)
381 		return TC_ACT_OK;
382 
383 	/* filter only packets we want */
384 	if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2),
385 			       &tcph, sizeof(tcph)) < 0)
386 		return TC_ACT_OK;
387 
388 	if (tcph.dest != __bpf_constant_htons(cfg_port))
389 		return TC_ACT_OK;
390 
391 	olen = sizeof(h_outer.ip);
392 
393 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
394 
395 	/* add room between mac and network header */
396 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
397 		return TC_ACT_SHOT;
398 
399 	/* prepare new outer network header */
400 	memset(&h_outer.ip, 0, sizeof(h_outer.ip));
401 	h_outer.ip.version = 6;
402 	h_outer.ip.hop_limit = iph_inner.ttl;
403 	h_outer.ip.saddr.s6_addr[1] = 0xfd;
404 	h_outer.ip.saddr.s6_addr[15] = 1;
405 	h_outer.ip.daddr.s6_addr[1] = 0xfd;
406 	h_outer.ip.daddr.s6_addr[15] = 2;
407 	h_outer.ip.payload_len = iph_inner.tot_len;
408 	h_outer.ip.nexthdr = IPPROTO_IPIP;
409 
410 	/* store new outer network header */
411 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
412 				BPF_F_INVALIDATE_HASH) < 0)
413 		return TC_ACT_SHOT;
414 
415 	/* update eth->h_proto */
416 	if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
417 		return TC_ACT_SHOT;
418 	eth.h_proto = bpf_htons(ETH_P_IPV6);
419 	if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
420 		return TC_ACT_SHOT;
421 
422 	return TC_ACT_OK;
423 }
424 
425 static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
426 				      __u16 l2_proto)
427 {
428 	return __encap_ipv6(skb, encap_proto, l2_proto, 0);
429 }
430 
431 SEC("encap_ipip_none")
432 int __encap_ipip_none(struct __sk_buff *skb)
433 {
434 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
435 		return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
436 	else
437 		return TC_ACT_OK;
438 }
439 
440 SEC("encap_gre_none")
441 int __encap_gre_none(struct __sk_buff *skb)
442 {
443 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
444 		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
445 	else
446 		return TC_ACT_OK;
447 }
448 
449 SEC("encap_gre_mpls")
450 int __encap_gre_mpls(struct __sk_buff *skb)
451 {
452 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
453 		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
454 	else
455 		return TC_ACT_OK;
456 }
457 
458 SEC("encap_gre_eth")
459 int __encap_gre_eth(struct __sk_buff *skb)
460 {
461 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
462 		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
463 	else
464 		return TC_ACT_OK;
465 }
466 
467 SEC("encap_udp_none")
468 int __encap_udp_none(struct __sk_buff *skb)
469 {
470 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
471 		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
472 	else
473 		return TC_ACT_OK;
474 }
475 
476 SEC("encap_udp_mpls")
477 int __encap_udp_mpls(struct __sk_buff *skb)
478 {
479 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
480 		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
481 	else
482 		return TC_ACT_OK;
483 }
484 
485 SEC("encap_udp_eth")
486 int __encap_udp_eth(struct __sk_buff *skb)
487 {
488 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
489 		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
490 	else
491 		return TC_ACT_OK;
492 }
493 
494 SEC("encap_vxlan_eth")
495 int __encap_vxlan_eth(struct __sk_buff *skb)
496 {
497 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
498 		return __encap_ipv4(skb, IPPROTO_UDP,
499 				    ETH_P_TEB,
500 				    EXTPROTO_VXLAN);
501 	else
502 		return TC_ACT_OK;
503 }
504 
505 SEC("encap_sit_none")
506 int __encap_sit_none(struct __sk_buff *skb)
507 {
508 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
509 		return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
510 	else
511 		return TC_ACT_OK;
512 }
513 
514 SEC("encap_ip6tnl_none")
515 int __encap_ip6tnl_none(struct __sk_buff *skb)
516 {
517 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
518 		return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
519 	else
520 		return TC_ACT_OK;
521 }
522 
523 SEC("encap_ipip6_none")
524 int __encap_ipip6_none(struct __sk_buff *skb)
525 {
526 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
527 		return encap_ipv6_ipip6(skb);
528 	else
529 		return TC_ACT_OK;
530 }
531 
532 SEC("encap_ip6gre_none")
533 int __encap_ip6gre_none(struct __sk_buff *skb)
534 {
535 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
536 		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
537 	else
538 		return TC_ACT_OK;
539 }
540 
541 SEC("encap_ip6gre_mpls")
542 int __encap_ip6gre_mpls(struct __sk_buff *skb)
543 {
544 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
545 		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
546 	else
547 		return TC_ACT_OK;
548 }
549 
550 SEC("encap_ip6gre_eth")
551 int __encap_ip6gre_eth(struct __sk_buff *skb)
552 {
553 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
554 		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
555 	else
556 		return TC_ACT_OK;
557 }
558 
559 SEC("encap_ip6udp_none")
560 int __encap_ip6udp_none(struct __sk_buff *skb)
561 {
562 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
563 		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
564 	else
565 		return TC_ACT_OK;
566 }
567 
568 SEC("encap_ip6udp_mpls")
569 int __encap_ip6udp_mpls(struct __sk_buff *skb)
570 {
571 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
572 		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
573 	else
574 		return TC_ACT_OK;
575 }
576 
577 SEC("encap_ip6udp_eth")
578 int __encap_ip6udp_eth(struct __sk_buff *skb)
579 {
580 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
581 		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
582 	else
583 		return TC_ACT_OK;
584 }
585 
586 SEC("encap_ip6vxlan_eth")
587 int __encap_ip6vxlan_eth(struct __sk_buff *skb)
588 {
589 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
590 		return __encap_ipv6(skb, IPPROTO_UDP,
591 				    ETH_P_TEB,
592 				    EXTPROTO_VXLAN);
593 	else
594 		return TC_ACT_OK;
595 }
596 
597 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
598 {
599 	__u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
600 	struct ipv6_opt_hdr ip6_opt_hdr;
601 	struct gre_hdr greh;
602 	struct udphdr udph;
603 	int olen = len;
604 
605 	switch (proto) {
606 	case IPPROTO_IPIP:
607 		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
608 		break;
609 	case IPPROTO_IPV6:
610 		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
611 		break;
612 	case NEXTHDR_DEST:
613 		if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
614 				       sizeof(ip6_opt_hdr)) < 0)
615 			return TC_ACT_OK;
616 		switch (ip6_opt_hdr.nexthdr) {
617 		case IPPROTO_IPIP:
618 			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
619 			break;
620 		case IPPROTO_IPV6:
621 			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
622 			break;
623 		default:
624 			return TC_ACT_OK;
625 		}
626 		break;
627 	case IPPROTO_GRE:
628 		olen += sizeof(struct gre_hdr);
629 		if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
630 			return TC_ACT_OK;
631 		switch (bpf_ntohs(greh.protocol)) {
632 		case ETH_P_MPLS_UC:
633 			olen += sizeof(mpls_label);
634 			break;
635 		case ETH_P_TEB:
636 			olen += ETH_HLEN;
637 			break;
638 		}
639 		break;
640 	case IPPROTO_UDP:
641 		olen += sizeof(struct udphdr);
642 		if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
643 			return TC_ACT_OK;
644 		switch (bpf_ntohs(udph.dest)) {
645 		case MPLS_OVER_UDP_PORT:
646 			olen += sizeof(mpls_label);
647 			break;
648 		case ETH_OVER_UDP_PORT:
649 			olen += ETH_HLEN;
650 			break;
651 		case VXLAN_UDP_PORT:
652 			olen += ETH_HLEN + sizeof(struct vxlanhdr);
653 			break;
654 		}
655 		break;
656 	default:
657 		return TC_ACT_OK;
658 	}
659 
660 	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
661 		return TC_ACT_SHOT;
662 
663 	return TC_ACT_OK;
664 }
665 
666 static int decap_ipv4(struct __sk_buff *skb)
667 {
668 	struct iphdr iph_outer;
669 
670 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
671 			       sizeof(iph_outer)) < 0)
672 		return TC_ACT_OK;
673 
674 	if (iph_outer.ihl != 5)
675 		return TC_ACT_OK;
676 
677 	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
678 			      iph_outer.protocol);
679 }
680 
681 static int decap_ipv6(struct __sk_buff *skb)
682 {
683 	struct ipv6hdr iph_outer;
684 
685 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
686 			       sizeof(iph_outer)) < 0)
687 		return TC_ACT_OK;
688 
689 	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
690 			      iph_outer.nexthdr);
691 }
692 
693 SEC("decap")
694 int decap_f(struct __sk_buff *skb)
695 {
696 	switch (skb->protocol) {
697 	case __bpf_constant_htons(ETH_P_IP):
698 		return decap_ipv4(skb);
699 	case __bpf_constant_htons(ETH_P_IPV6):
700 		return decap_ipv6(skb);
701 	default:
702 		/* does not match, ignore */
703 		return TC_ACT_OK;
704 	}
705 }
706 
707 char __license[] SEC("license") = "GPL";
708