1 // SPDX-License-Identifier: GPL-2.0
2 
3 /* In-place tunneling */
4 
5 #include <stdbool.h>
6 #include <string.h>
7 
8 #include <linux/stddef.h>
9 #include <linux/bpf.h>
10 #include <linux/if_ether.h>
11 #include <linux/in.h>
12 #include <linux/ip.h>
13 #include <linux/ipv6.h>
14 #include <linux/mpls.h>
15 #include <linux/tcp.h>
16 #include <linux/udp.h>
17 #include <linux/pkt_cls.h>
18 #include <linux/types.h>
19 
20 #include "bpf_endian.h"
21 #include "bpf_helpers.h"
22 
23 static const int cfg_port = 8000;
24 
25 static const int cfg_udp_src = 20000;
26 
27 #define	UDP_PORT		5555
28 #define	MPLS_OVER_UDP_PORT	6635
29 #define	ETH_OVER_UDP_PORT	7777
30 
31 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
32 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
33 						     MPLS_LS_S_MASK | 0xff);
34 
35 struct gre_hdr {
36 	__be16 flags;
37 	__be16 protocol;
38 } __attribute__((packed));
39 
40 union l4hdr {
41 	struct udphdr udp;
42 	struct gre_hdr gre;
43 };
44 
45 struct v4hdr {
46 	struct iphdr ip;
47 	union l4hdr l4hdr;
48 	__u8 pad[16];			/* enough space for L2 header */
49 } __attribute__((packed));
50 
51 struct v6hdr {
52 	struct ipv6hdr ip;
53 	union l4hdr l4hdr;
54 	__u8 pad[16];			/* enough space for L2 header */
55 } __attribute__((packed));
56 
57 static __always_inline void set_ipv4_csum(struct iphdr *iph)
58 {
59 	__u16 *iph16 = (__u16 *)iph;
60 	__u32 csum;
61 	int i;
62 
63 	iph->check = 0;
64 
65 #pragma clang loop unroll(full)
66 	for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
67 		csum += *iph16++;
68 
69 	iph->check = ~((csum & 0xffff) + (csum >> 16));
70 }
71 
72 static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
73 				      __u16 l2_proto)
74 {
75 	__u16 udp_dst = UDP_PORT;
76 	struct iphdr iph_inner;
77 	struct v4hdr h_outer;
78 	struct tcphdr tcph;
79 	int olen, l2_len;
80 	int tcp_off;
81 	__u64 flags;
82 
83 	/* Most tests encapsulate a packet into a tunnel with the same
84 	 * network protocol, and derive the outer header fields from
85 	 * the inner header.
86 	 *
87 	 * The 6in4 case tests different inner and outer protocols. As
88 	 * the inner is ipv6, but the outer expects an ipv4 header as
89 	 * input, manually build a struct iphdr based on the ipv6hdr.
90 	 */
91 	if (encap_proto == IPPROTO_IPV6) {
92 		const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
93 		const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
94 		struct ipv6hdr iph6_inner;
95 
96 		/* Read the IPv6 header */
97 		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
98 				       sizeof(iph6_inner)) < 0)
99 			return TC_ACT_OK;
100 
101 		/* Derive the IPv4 header fields from the IPv6 header */
102 		memset(&iph_inner, 0, sizeof(iph_inner));
103 		iph_inner.version = 4;
104 		iph_inner.ihl = 5;
105 		iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
106 				    bpf_ntohs(iph6_inner.payload_len));
107 		iph_inner.ttl = iph6_inner.hop_limit - 1;
108 		iph_inner.protocol = iph6_inner.nexthdr;
109 		iph_inner.saddr = __bpf_constant_htonl(saddr);
110 		iph_inner.daddr = __bpf_constant_htonl(daddr);
111 
112 		tcp_off = sizeof(iph6_inner);
113 	} else {
114 		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
115 				       sizeof(iph_inner)) < 0)
116 			return TC_ACT_OK;
117 
118 		tcp_off = sizeof(iph_inner);
119 	}
120 
121 	/* filter only packets we want */
122 	if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
123 		return TC_ACT_OK;
124 
125 	if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
126 			       &tcph, sizeof(tcph)) < 0)
127 		return TC_ACT_OK;
128 
129 	if (tcph.dest != __bpf_constant_htons(cfg_port))
130 		return TC_ACT_OK;
131 
132 	olen = sizeof(h_outer.ip);
133 	l2_len = 0;
134 
135 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
136 
137 	switch (l2_proto) {
138 	case ETH_P_MPLS_UC:
139 		l2_len = sizeof(mpls_label);
140 		udp_dst = MPLS_OVER_UDP_PORT;
141 		break;
142 	case ETH_P_TEB:
143 		l2_len = ETH_HLEN;
144 		udp_dst = ETH_OVER_UDP_PORT;
145 		break;
146 	}
147 	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
148 
149 	switch (encap_proto) {
150 	case IPPROTO_GRE:
151 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
152 		olen += sizeof(h_outer.l4hdr.gre);
153 		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
154 		h_outer.l4hdr.gre.flags = 0;
155 		break;
156 	case IPPROTO_UDP:
157 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
158 		olen += sizeof(h_outer.l4hdr.udp);
159 		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
160 		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
161 		h_outer.l4hdr.udp.check = 0;
162 		h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
163 						  sizeof(h_outer.l4hdr.udp) +
164 						  l2_len);
165 		break;
166 	case IPPROTO_IPIP:
167 	case IPPROTO_IPV6:
168 		break;
169 	default:
170 		return TC_ACT_OK;
171 	}
172 
173 	/* add L2 encap (if specified) */
174 	switch (l2_proto) {
175 	case ETH_P_MPLS_UC:
176 		*((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
177 		break;
178 	case ETH_P_TEB:
179 		if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
180 				       ETH_HLEN))
181 			return TC_ACT_SHOT;
182 		break;
183 	}
184 	olen += l2_len;
185 
186 	/* add room between mac and network header */
187 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
188 		return TC_ACT_SHOT;
189 
190 	/* prepare new outer network header */
191 	h_outer.ip = iph_inner;
192 	h_outer.ip.tot_len = bpf_htons(olen +
193 				       bpf_ntohs(h_outer.ip.tot_len));
194 	h_outer.ip.protocol = encap_proto;
195 
196 	set_ipv4_csum((void *)&h_outer.ip);
197 
198 	/* store new outer network header */
199 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
200 				BPF_F_INVALIDATE_HASH) < 0)
201 		return TC_ACT_SHOT;
202 
203 	/* if changing outer proto type, update eth->h_proto */
204 	if (encap_proto == IPPROTO_IPV6) {
205 		struct ethhdr eth;
206 
207 		if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
208 			return TC_ACT_SHOT;
209 		eth.h_proto = bpf_htons(ETH_P_IP);
210 		if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
211 			return TC_ACT_SHOT;
212 	}
213 
214 	return TC_ACT_OK;
215 }
216 
217 static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
218 				      __u16 l2_proto)
219 {
220 	__u16 udp_dst = UDP_PORT;
221 	struct ipv6hdr iph_inner;
222 	struct v6hdr h_outer;
223 	struct tcphdr tcph;
224 	int olen, l2_len;
225 	__u16 tot_len;
226 	__u64 flags;
227 
228 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
229 			       sizeof(iph_inner)) < 0)
230 		return TC_ACT_OK;
231 
232 	/* filter only packets we want */
233 	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
234 			       &tcph, sizeof(tcph)) < 0)
235 		return TC_ACT_OK;
236 
237 	if (tcph.dest != __bpf_constant_htons(cfg_port))
238 		return TC_ACT_OK;
239 
240 	olen = sizeof(h_outer.ip);
241 	l2_len = 0;
242 
243 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
244 
245 	switch (l2_proto) {
246 	case ETH_P_MPLS_UC:
247 		l2_len = sizeof(mpls_label);
248 		udp_dst = MPLS_OVER_UDP_PORT;
249 		break;
250 	case ETH_P_TEB:
251 		l2_len = ETH_HLEN;
252 		udp_dst = ETH_OVER_UDP_PORT;
253 		break;
254 	}
255 	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
256 
257 	switch (encap_proto) {
258 	case IPPROTO_GRE:
259 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
260 		olen += sizeof(h_outer.l4hdr.gre);
261 		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
262 		h_outer.l4hdr.gre.flags = 0;
263 		break;
264 	case IPPROTO_UDP:
265 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
266 		olen += sizeof(h_outer.l4hdr.udp);
267 		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
268 		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
269 		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
270 			  sizeof(h_outer.l4hdr.udp);
271 		h_outer.l4hdr.udp.check = 0;
272 		h_outer.l4hdr.udp.len = bpf_htons(tot_len);
273 		break;
274 	case IPPROTO_IPV6:
275 		break;
276 	default:
277 		return TC_ACT_OK;
278 	}
279 
280 	/* add L2 encap (if specified) */
281 	switch (l2_proto) {
282 	case ETH_P_MPLS_UC:
283 		*((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
284 		break;
285 	case ETH_P_TEB:
286 		if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
287 				       ETH_HLEN))
288 			return TC_ACT_SHOT;
289 		break;
290 	}
291 	olen += l2_len;
292 
293 	/* add room between mac and network header */
294 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
295 		return TC_ACT_SHOT;
296 
297 	/* prepare new outer network header */
298 	h_outer.ip = iph_inner;
299 	h_outer.ip.payload_len = bpf_htons(olen +
300 					   bpf_ntohs(h_outer.ip.payload_len));
301 
302 	h_outer.ip.nexthdr = encap_proto;
303 
304 	/* store new outer network header */
305 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
306 				BPF_F_INVALIDATE_HASH) < 0)
307 		return TC_ACT_SHOT;
308 
309 	return TC_ACT_OK;
310 }
311 
312 SEC("encap_ipip_none")
313 int __encap_ipip_none(struct __sk_buff *skb)
314 {
315 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
316 		return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
317 	else
318 		return TC_ACT_OK;
319 }
320 
321 SEC("encap_gre_none")
322 int __encap_gre_none(struct __sk_buff *skb)
323 {
324 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
325 		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
326 	else
327 		return TC_ACT_OK;
328 }
329 
330 SEC("encap_gre_mpls")
331 int __encap_gre_mpls(struct __sk_buff *skb)
332 {
333 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
334 		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
335 	else
336 		return TC_ACT_OK;
337 }
338 
339 SEC("encap_gre_eth")
340 int __encap_gre_eth(struct __sk_buff *skb)
341 {
342 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
343 		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
344 	else
345 		return TC_ACT_OK;
346 }
347 
348 SEC("encap_udp_none")
349 int __encap_udp_none(struct __sk_buff *skb)
350 {
351 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
352 		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
353 	else
354 		return TC_ACT_OK;
355 }
356 
357 SEC("encap_udp_mpls")
358 int __encap_udp_mpls(struct __sk_buff *skb)
359 {
360 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
361 		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
362 	else
363 		return TC_ACT_OK;
364 }
365 
366 SEC("encap_udp_eth")
367 int __encap_udp_eth(struct __sk_buff *skb)
368 {
369 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
370 		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
371 	else
372 		return TC_ACT_OK;
373 }
374 
375 SEC("encap_sit_none")
376 int __encap_sit_none(struct __sk_buff *skb)
377 {
378 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
379 		return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
380 	else
381 		return TC_ACT_OK;
382 }
383 
384 SEC("encap_ip6tnl_none")
385 int __encap_ip6tnl_none(struct __sk_buff *skb)
386 {
387 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
388 		return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
389 	else
390 		return TC_ACT_OK;
391 }
392 
393 SEC("encap_ip6gre_none")
394 int __encap_ip6gre_none(struct __sk_buff *skb)
395 {
396 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
397 		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
398 	else
399 		return TC_ACT_OK;
400 }
401 
402 SEC("encap_ip6gre_mpls")
403 int __encap_ip6gre_mpls(struct __sk_buff *skb)
404 {
405 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
406 		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
407 	else
408 		return TC_ACT_OK;
409 }
410 
411 SEC("encap_ip6gre_eth")
412 int __encap_ip6gre_eth(struct __sk_buff *skb)
413 {
414 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
415 		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
416 	else
417 		return TC_ACT_OK;
418 }
419 
420 SEC("encap_ip6udp_none")
421 int __encap_ip6udp_none(struct __sk_buff *skb)
422 {
423 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
424 		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
425 	else
426 		return TC_ACT_OK;
427 }
428 
429 SEC("encap_ip6udp_mpls")
430 int __encap_ip6udp_mpls(struct __sk_buff *skb)
431 {
432 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
433 		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
434 	else
435 		return TC_ACT_OK;
436 }
437 
438 SEC("encap_ip6udp_eth")
439 int __encap_ip6udp_eth(struct __sk_buff *skb)
440 {
441 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
442 		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
443 	else
444 		return TC_ACT_OK;
445 }
446 
447 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
448 {
449 	char buf[sizeof(struct v6hdr)];
450 	struct gre_hdr greh;
451 	struct udphdr udph;
452 	int olen = len;
453 
454 	switch (proto) {
455 	case IPPROTO_IPIP:
456 	case IPPROTO_IPV6:
457 		break;
458 	case IPPROTO_GRE:
459 		olen += sizeof(struct gre_hdr);
460 		if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
461 			return TC_ACT_OK;
462 		switch (bpf_ntohs(greh.protocol)) {
463 		case ETH_P_MPLS_UC:
464 			olen += sizeof(mpls_label);
465 			break;
466 		case ETH_P_TEB:
467 			olen += ETH_HLEN;
468 			break;
469 		}
470 		break;
471 	case IPPROTO_UDP:
472 		olen += sizeof(struct udphdr);
473 		if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
474 			return TC_ACT_OK;
475 		switch (bpf_ntohs(udph.dest)) {
476 		case MPLS_OVER_UDP_PORT:
477 			olen += sizeof(mpls_label);
478 			break;
479 		case ETH_OVER_UDP_PORT:
480 			olen += ETH_HLEN;
481 			break;
482 		}
483 		break;
484 	default:
485 		return TC_ACT_OK;
486 	}
487 
488 	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC,
489 				BPF_F_ADJ_ROOM_FIXED_GSO))
490 		return TC_ACT_SHOT;
491 
492 	return TC_ACT_OK;
493 }
494 
495 static int decap_ipv4(struct __sk_buff *skb)
496 {
497 	struct iphdr iph_outer;
498 
499 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
500 			       sizeof(iph_outer)) < 0)
501 		return TC_ACT_OK;
502 
503 	if (iph_outer.ihl != 5)
504 		return TC_ACT_OK;
505 
506 	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
507 			      iph_outer.protocol);
508 }
509 
510 static int decap_ipv6(struct __sk_buff *skb)
511 {
512 	struct ipv6hdr iph_outer;
513 
514 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
515 			       sizeof(iph_outer)) < 0)
516 		return TC_ACT_OK;
517 
518 	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
519 			      iph_outer.nexthdr);
520 }
521 
522 SEC("decap")
523 int decap_f(struct __sk_buff *skb)
524 {
525 	switch (skb->protocol) {
526 	case __bpf_constant_htons(ETH_P_IP):
527 		return decap_ipv4(skb);
528 	case __bpf_constant_htons(ETH_P_IPV6):
529 		return decap_ipv6(skb);
530 	default:
531 		/* does not match, ignore */
532 		return TC_ACT_OK;
533 	}
534 }
535 
536 char __license[] SEC("license") = "GPL";
537