1 // SPDX-License-Identifier: GPL-2.0
2 
3 /* In-place tunneling */
4 
5 #include <stdbool.h>
6 #include <string.h>
7 
8 #include <linux/stddef.h>
9 #include <linux/bpf.h>
10 #include <linux/if_ether.h>
11 #include <linux/in.h>
12 #include <linux/ip.h>
13 #include <linux/ipv6.h>
14 #include <linux/mpls.h>
15 #include <linux/tcp.h>
16 #include <linux/udp.h>
17 #include <linux/pkt_cls.h>
18 #include <linux/types.h>
19 
20 #include "bpf_endian.h"
21 #include "bpf_helpers.h"
22 
23 static const int cfg_port = 8000;
24 
25 static const int cfg_udp_src = 20000;
26 
27 #define	UDP_PORT		5555
28 #define	MPLS_OVER_UDP_PORT	6635
29 #define	ETH_OVER_UDP_PORT	7777
30 
31 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
32 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
33 						     MPLS_LS_S_MASK | 0xff);
34 
35 struct gre_hdr {
36 	__be16 flags;
37 	__be16 protocol;
38 } __attribute__((packed));
39 
40 union l4hdr {
41 	struct udphdr udp;
42 	struct gre_hdr gre;
43 };
44 
45 struct v4hdr {
46 	struct iphdr ip;
47 	union l4hdr l4hdr;
48 	__u8 pad[16];			/* enough space for L2 header */
49 } __attribute__((packed));
50 
51 struct v6hdr {
52 	struct ipv6hdr ip;
53 	union l4hdr l4hdr;
54 	__u8 pad[16];			/* enough space for L2 header */
55 } __attribute__((packed));
56 
57 static __always_inline void set_ipv4_csum(struct iphdr *iph)
58 {
59 	__u16 *iph16 = (__u16 *)iph;
60 	__u32 csum;
61 	int i;
62 
63 	iph->check = 0;
64 
65 #pragma clang loop unroll(full)
66 	for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
67 		csum += *iph16++;
68 
69 	iph->check = ~((csum & 0xffff) + (csum >> 16));
70 }
71 
72 static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
73 				      __u16 l2_proto)
74 {
75 	__u16 udp_dst = UDP_PORT;
76 	struct iphdr iph_inner;
77 	struct v4hdr h_outer;
78 	struct tcphdr tcph;
79 	int olen, l2_len;
80 	__u64 flags;
81 
82 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
83 			       sizeof(iph_inner)) < 0)
84 		return TC_ACT_OK;
85 
86 	/* filter only packets we want */
87 	if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
88 		return TC_ACT_OK;
89 
90 	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
91 			       &tcph, sizeof(tcph)) < 0)
92 		return TC_ACT_OK;
93 
94 	if (tcph.dest != __bpf_constant_htons(cfg_port))
95 		return TC_ACT_OK;
96 
97 	olen = sizeof(h_outer.ip);
98 	l2_len = 0;
99 
100 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
101 
102 	switch (l2_proto) {
103 	case ETH_P_MPLS_UC:
104 		l2_len = sizeof(mpls_label);
105 		udp_dst = MPLS_OVER_UDP_PORT;
106 		break;
107 	case ETH_P_TEB:
108 		l2_len = ETH_HLEN;
109 		udp_dst = ETH_OVER_UDP_PORT;
110 		break;
111 	}
112 	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
113 
114 	switch (encap_proto) {
115 	case IPPROTO_GRE:
116 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
117 		olen += sizeof(h_outer.l4hdr.gre);
118 		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
119 		h_outer.l4hdr.gre.flags = 0;
120 		break;
121 	case IPPROTO_UDP:
122 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
123 		olen += sizeof(h_outer.l4hdr.udp);
124 		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
125 		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
126 		h_outer.l4hdr.udp.check = 0;
127 		h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
128 						  sizeof(h_outer.l4hdr.udp) +
129 						  l2_len);
130 		break;
131 	case IPPROTO_IPIP:
132 		break;
133 	default:
134 		return TC_ACT_OK;
135 	}
136 
137 	/* add L2 encap (if specified) */
138 	switch (l2_proto) {
139 	case ETH_P_MPLS_UC:
140 		*((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
141 		break;
142 	case ETH_P_TEB:
143 		if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
144 				       ETH_HLEN))
145 			return TC_ACT_SHOT;
146 		break;
147 	}
148 	olen += l2_len;
149 
150 	/* add room between mac and network header */
151 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
152 		return TC_ACT_SHOT;
153 
154 	/* prepare new outer network header */
155 	h_outer.ip = iph_inner;
156 	h_outer.ip.tot_len = bpf_htons(olen +
157 				       bpf_ntohs(h_outer.ip.tot_len));
158 	h_outer.ip.protocol = encap_proto;
159 
160 	set_ipv4_csum(&h_outer.ip);
161 
162 	/* store new outer network header */
163 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
164 				BPF_F_INVALIDATE_HASH) < 0)
165 		return TC_ACT_SHOT;
166 
167 	return TC_ACT_OK;
168 }
169 
170 static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
171 				      __u16 l2_proto)
172 {
173 	__u16 udp_dst = UDP_PORT;
174 	struct ipv6hdr iph_inner;
175 	struct v6hdr h_outer;
176 	struct tcphdr tcph;
177 	int olen, l2_len;
178 	__u16 tot_len;
179 	__u64 flags;
180 
181 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
182 			       sizeof(iph_inner)) < 0)
183 		return TC_ACT_OK;
184 
185 	/* filter only packets we want */
186 	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
187 			       &tcph, sizeof(tcph)) < 0)
188 		return TC_ACT_OK;
189 
190 	if (tcph.dest != __bpf_constant_htons(cfg_port))
191 		return TC_ACT_OK;
192 
193 	olen = sizeof(h_outer.ip);
194 	l2_len = 0;
195 
196 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
197 
198 	switch (l2_proto) {
199 	case ETH_P_MPLS_UC:
200 		l2_len = sizeof(mpls_label);
201 		udp_dst = MPLS_OVER_UDP_PORT;
202 		break;
203 	case ETH_P_TEB:
204 		l2_len = ETH_HLEN;
205 		udp_dst = ETH_OVER_UDP_PORT;
206 		break;
207 	}
208 	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
209 
210 	switch (encap_proto) {
211 	case IPPROTO_GRE:
212 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
213 		olen += sizeof(h_outer.l4hdr.gre);
214 		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
215 		h_outer.l4hdr.gre.flags = 0;
216 		break;
217 	case IPPROTO_UDP:
218 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
219 		olen += sizeof(h_outer.l4hdr.udp);
220 		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
221 		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
222 		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
223 			  sizeof(h_outer.l4hdr.udp);
224 		h_outer.l4hdr.udp.check = 0;
225 		h_outer.l4hdr.udp.len = bpf_htons(tot_len);
226 		break;
227 	case IPPROTO_IPV6:
228 		break;
229 	default:
230 		return TC_ACT_OK;
231 	}
232 
233 	/* add L2 encap (if specified) */
234 	switch (l2_proto) {
235 	case ETH_P_MPLS_UC:
236 		*((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
237 		break;
238 	case ETH_P_TEB:
239 		if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
240 				       ETH_HLEN))
241 			return TC_ACT_SHOT;
242 		break;
243 	}
244 	olen += l2_len;
245 
246 	/* add room between mac and network header */
247 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
248 		return TC_ACT_SHOT;
249 
250 	/* prepare new outer network header */
251 	h_outer.ip = iph_inner;
252 	h_outer.ip.payload_len = bpf_htons(olen +
253 					   bpf_ntohs(h_outer.ip.payload_len));
254 
255 	h_outer.ip.nexthdr = encap_proto;
256 
257 	/* store new outer network header */
258 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
259 				BPF_F_INVALIDATE_HASH) < 0)
260 		return TC_ACT_SHOT;
261 
262 	return TC_ACT_OK;
263 }
264 
265 SEC("encap_ipip_none")
266 int __encap_ipip_none(struct __sk_buff *skb)
267 {
268 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
269 		return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
270 	else
271 		return TC_ACT_OK;
272 }
273 
274 SEC("encap_gre_none")
275 int __encap_gre_none(struct __sk_buff *skb)
276 {
277 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
278 		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
279 	else
280 		return TC_ACT_OK;
281 }
282 
283 SEC("encap_gre_mpls")
284 int __encap_gre_mpls(struct __sk_buff *skb)
285 {
286 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
287 		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
288 	else
289 		return TC_ACT_OK;
290 }
291 
292 SEC("encap_gre_eth")
293 int __encap_gre_eth(struct __sk_buff *skb)
294 {
295 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
296 		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
297 	else
298 		return TC_ACT_OK;
299 }
300 
301 SEC("encap_udp_none")
302 int __encap_udp_none(struct __sk_buff *skb)
303 {
304 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
305 		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
306 	else
307 		return TC_ACT_OK;
308 }
309 
310 SEC("encap_udp_mpls")
311 int __encap_udp_mpls(struct __sk_buff *skb)
312 {
313 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
314 		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
315 	else
316 		return TC_ACT_OK;
317 }
318 
319 SEC("encap_udp_eth")
320 int __encap_udp_eth(struct __sk_buff *skb)
321 {
322 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
323 		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
324 	else
325 		return TC_ACT_OK;
326 }
327 
328 SEC("encap_ip6tnl_none")
329 int __encap_ip6tnl_none(struct __sk_buff *skb)
330 {
331 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
332 		return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
333 	else
334 		return TC_ACT_OK;
335 }
336 
337 SEC("encap_ip6gre_none")
338 int __encap_ip6gre_none(struct __sk_buff *skb)
339 {
340 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
341 		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
342 	else
343 		return TC_ACT_OK;
344 }
345 
346 SEC("encap_ip6gre_mpls")
347 int __encap_ip6gre_mpls(struct __sk_buff *skb)
348 {
349 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
350 		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
351 	else
352 		return TC_ACT_OK;
353 }
354 
355 SEC("encap_ip6gre_eth")
356 int __encap_ip6gre_eth(struct __sk_buff *skb)
357 {
358 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
359 		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
360 	else
361 		return TC_ACT_OK;
362 }
363 
364 SEC("encap_ip6udp_none")
365 int __encap_ip6udp_none(struct __sk_buff *skb)
366 {
367 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
368 		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
369 	else
370 		return TC_ACT_OK;
371 }
372 
373 SEC("encap_ip6udp_mpls")
374 int __encap_ip6udp_mpls(struct __sk_buff *skb)
375 {
376 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
377 		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
378 	else
379 		return TC_ACT_OK;
380 }
381 
382 SEC("encap_ip6udp_eth")
383 int __encap_ip6udp_eth(struct __sk_buff *skb)
384 {
385 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
386 		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
387 	else
388 		return TC_ACT_OK;
389 }
390 
391 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
392 {
393 	char buf[sizeof(struct v6hdr)];
394 	struct gre_hdr greh;
395 	struct udphdr udph;
396 	int olen = len;
397 
398 	switch (proto) {
399 	case IPPROTO_IPIP:
400 	case IPPROTO_IPV6:
401 		break;
402 	case IPPROTO_GRE:
403 		olen += sizeof(struct gre_hdr);
404 		if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
405 			return TC_ACT_OK;
406 		switch (bpf_ntohs(greh.protocol)) {
407 		case ETH_P_MPLS_UC:
408 			olen += sizeof(mpls_label);
409 			break;
410 		case ETH_P_TEB:
411 			olen += ETH_HLEN;
412 			break;
413 		}
414 		break;
415 	case IPPROTO_UDP:
416 		olen += sizeof(struct udphdr);
417 		if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
418 			return TC_ACT_OK;
419 		switch (bpf_ntohs(udph.dest)) {
420 		case MPLS_OVER_UDP_PORT:
421 			olen += sizeof(mpls_label);
422 			break;
423 		case ETH_OVER_UDP_PORT:
424 			olen += ETH_HLEN;
425 			break;
426 		}
427 		break;
428 	default:
429 		return TC_ACT_OK;
430 	}
431 
432 	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC,
433 				BPF_F_ADJ_ROOM_FIXED_GSO))
434 		return TC_ACT_SHOT;
435 
436 	return TC_ACT_OK;
437 }
438 
439 static int decap_ipv4(struct __sk_buff *skb)
440 {
441 	struct iphdr iph_outer;
442 
443 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
444 			       sizeof(iph_outer)) < 0)
445 		return TC_ACT_OK;
446 
447 	if (iph_outer.ihl != 5)
448 		return TC_ACT_OK;
449 
450 	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
451 			      iph_outer.protocol);
452 }
453 
454 static int decap_ipv6(struct __sk_buff *skb)
455 {
456 	struct ipv6hdr iph_outer;
457 
458 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
459 			       sizeof(iph_outer)) < 0)
460 		return TC_ACT_OK;
461 
462 	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
463 			      iph_outer.nexthdr);
464 }
465 
466 SEC("decap")
467 int decap_f(struct __sk_buff *skb)
468 {
469 	switch (skb->protocol) {
470 	case __bpf_constant_htons(ETH_P_IP):
471 		return decap_ipv4(skb);
472 	case __bpf_constant_htons(ETH_P_IPV6):
473 		return decap_ipv6(skb);
474 	default:
475 		/* does not match, ignore */
476 		return TC_ACT_OK;
477 	}
478 }
479 
480 char __license[] SEC("license") = "GPL";
481