1 // SPDX-License-Identifier: GPL-2.0
2 #include <limits.h>
3 #include <stddef.h>
4 #include <stdbool.h>
5 #include <string.h>
6 #include <linux/pkt_cls.h>
7 #include <linux/bpf.h>
8 #include <linux/in.h>
9 #include <linux/if_ether.h>
10 #include <linux/icmp.h>
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/tcp.h>
14 #include <linux/udp.h>
15 #include <linux/if_packet.h>
16 #include <sys/socket.h>
17 #include <linux/if_tunnel.h>
18 #include <linux/mpls.h>
19 #include <bpf/bpf_helpers.h>
20 #include <bpf/bpf_endian.h>
21 
22 int _version SEC("version") = 1;
23 #define PROG(F) SEC(#F) int bpf_func_##F
24 
25 /* These are the identifiers of the BPF programs that will be used in tail
26  * calls. Name is limited to 16 characters, with the terminating character and
27  * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
28  */
29 enum {
30 	IP,
31 	IPV6,
32 	IPV6OP,	/* Destination/Hop-by-Hop Options IPv6 Extension header */
33 	IPV6FR,	/* Fragmentation IPv6 Extension Header */
34 	MPLS,
35 	VLAN,
36 };
37 
38 #define IP_MF		0x2000
39 #define IP_OFFSET	0x1FFF
40 #define IP6_MF		0x0001
41 #define IP6_OFFSET	0xFFF8
42 
43 struct vlan_hdr {
44 	__be16 h_vlan_TCI;
45 	__be16 h_vlan_encapsulated_proto;
46 };
47 
48 struct gre_hdr {
49 	__be16 flags;
50 	__be16 proto;
51 };
52 
53 struct frag_hdr {
54 	__u8 nexthdr;
55 	__u8 reserved;
56 	__be16 frag_off;
57 	__be32 identification;
58 };
59 
60 struct {
61 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
62 	__uint(max_entries, 8);
63 	__uint(key_size, sizeof(__u32));
64 	__uint(value_size, sizeof(__u32));
65 } jmp_table SEC(".maps");
66 
67 struct {
68 	__uint(type, BPF_MAP_TYPE_HASH);
69 	__uint(max_entries, 1024);
70 	__type(key, __u32);
71 	__type(value, struct bpf_flow_keys);
72 } last_dissection SEC(".maps");
73 
74 static __always_inline int export_flow_keys(struct bpf_flow_keys *keys,
75 					    int ret)
76 {
77 	__u32 key = (__u32)(keys->sport) << 16 | keys->dport;
78 	struct bpf_flow_keys val;
79 
80 	memcpy(&val, keys, sizeof(val));
81 	bpf_map_update_elem(&last_dissection, &key, &val, BPF_ANY);
82 	return ret;
83 }
84 
85 #define IPV6_FLOWLABEL_MASK		__bpf_constant_htonl(0x000FFFFF)
86 static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr)
87 {
88 	return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK;
89 }
90 
91 static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
92 							 __u16 hdr_size,
93 							 void *buffer)
94 {
95 	void *data_end = (void *)(long)skb->data_end;
96 	void *data = (void *)(long)skb->data;
97 	__u16 thoff = skb->flow_keys->thoff;
98 	__u8 *hdr;
99 
100 	/* Verifies this variable offset does not overflow */
101 	if (thoff > (USHRT_MAX - hdr_size))
102 		return NULL;
103 
104 	hdr = data + thoff;
105 	if (hdr + hdr_size <= data_end)
106 		return hdr;
107 
108 	if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size))
109 		return NULL;
110 
111 	return buffer;
112 }
113 
114 /* Dispatches on ETHERTYPE */
115 static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
116 {
117 	struct bpf_flow_keys *keys = skb->flow_keys;
118 
119 	switch (proto) {
120 	case bpf_htons(ETH_P_IP):
121 		bpf_tail_call(skb, &jmp_table, IP);
122 		break;
123 	case bpf_htons(ETH_P_IPV6):
124 		bpf_tail_call(skb, &jmp_table, IPV6);
125 		break;
126 	case bpf_htons(ETH_P_MPLS_MC):
127 	case bpf_htons(ETH_P_MPLS_UC):
128 		bpf_tail_call(skb, &jmp_table, MPLS);
129 		break;
130 	case bpf_htons(ETH_P_8021Q):
131 	case bpf_htons(ETH_P_8021AD):
132 		bpf_tail_call(skb, &jmp_table, VLAN);
133 		break;
134 	default:
135 		/* Protocol not supported */
136 		return export_flow_keys(keys, BPF_DROP);
137 	}
138 
139 	return export_flow_keys(keys, BPF_DROP);
140 }
141 
142 SEC("flow_dissector")
143 int _dissect(struct __sk_buff *skb)
144 {
145 	struct bpf_flow_keys *keys = skb->flow_keys;
146 
147 	return parse_eth_proto(skb, keys->n_proto);
148 }
149 
150 /* Parses on IPPROTO_* */
151 static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
152 {
153 	struct bpf_flow_keys *keys = skb->flow_keys;
154 	void *data_end = (void *)(long)skb->data_end;
155 	struct icmphdr *icmp, _icmp;
156 	struct gre_hdr *gre, _gre;
157 	struct ethhdr *eth, _eth;
158 	struct tcphdr *tcp, _tcp;
159 	struct udphdr *udp, _udp;
160 
161 	switch (proto) {
162 	case IPPROTO_ICMP:
163 		icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
164 		if (!icmp)
165 			return export_flow_keys(keys, BPF_DROP);
166 		return export_flow_keys(keys, BPF_OK);
167 	case IPPROTO_IPIP:
168 		keys->is_encap = true;
169 		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
170 			return export_flow_keys(keys, BPF_OK);
171 
172 		return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
173 	case IPPROTO_IPV6:
174 		keys->is_encap = true;
175 		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
176 			return export_flow_keys(keys, BPF_OK);
177 
178 		return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
179 	case IPPROTO_GRE:
180 		gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
181 		if (!gre)
182 			return export_flow_keys(keys, BPF_DROP);
183 
184 		if (bpf_htons(gre->flags & GRE_VERSION))
185 			/* Only inspect standard GRE packets with version 0 */
186 			return export_flow_keys(keys, BPF_OK);
187 
188 		keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */
189 		if (GRE_IS_CSUM(gre->flags))
190 			keys->thoff += 4; /* Step over chksum and Padding */
191 		if (GRE_IS_KEY(gre->flags))
192 			keys->thoff += 4; /* Step over key */
193 		if (GRE_IS_SEQ(gre->flags))
194 			keys->thoff += 4; /* Step over sequence number */
195 
196 		keys->is_encap = true;
197 		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
198 			return export_flow_keys(keys, BPF_OK);
199 
200 		if (gre->proto == bpf_htons(ETH_P_TEB)) {
201 			eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
202 							  &_eth);
203 			if (!eth)
204 				return export_flow_keys(keys, BPF_DROP);
205 
206 			keys->thoff += sizeof(*eth);
207 
208 			return parse_eth_proto(skb, eth->h_proto);
209 		} else {
210 			return parse_eth_proto(skb, gre->proto);
211 		}
212 	case IPPROTO_TCP:
213 		tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
214 		if (!tcp)
215 			return export_flow_keys(keys, BPF_DROP);
216 
217 		if (tcp->doff < 5)
218 			return export_flow_keys(keys, BPF_DROP);
219 
220 		if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
221 			return export_flow_keys(keys, BPF_DROP);
222 
223 		keys->sport = tcp->source;
224 		keys->dport = tcp->dest;
225 		return export_flow_keys(keys, BPF_OK);
226 	case IPPROTO_UDP:
227 	case IPPROTO_UDPLITE:
228 		udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
229 		if (!udp)
230 			return export_flow_keys(keys, BPF_DROP);
231 
232 		keys->sport = udp->source;
233 		keys->dport = udp->dest;
234 		return export_flow_keys(keys, BPF_OK);
235 	default:
236 		return export_flow_keys(keys, BPF_DROP);
237 	}
238 
239 	return export_flow_keys(keys, BPF_DROP);
240 }
241 
242 static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
243 {
244 	struct bpf_flow_keys *keys = skb->flow_keys;
245 
246 	switch (nexthdr) {
247 	case IPPROTO_HOPOPTS:
248 	case IPPROTO_DSTOPTS:
249 		bpf_tail_call(skb, &jmp_table, IPV6OP);
250 		break;
251 	case IPPROTO_FRAGMENT:
252 		bpf_tail_call(skb, &jmp_table, IPV6FR);
253 		break;
254 	default:
255 		return parse_ip_proto(skb, nexthdr);
256 	}
257 
258 	return export_flow_keys(keys, BPF_DROP);
259 }
260 
261 PROG(IP)(struct __sk_buff *skb)
262 {
263 	void *data_end = (void *)(long)skb->data_end;
264 	struct bpf_flow_keys *keys = skb->flow_keys;
265 	void *data = (void *)(long)skb->data;
266 	struct iphdr *iph, _iph;
267 	bool done = false;
268 
269 	iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
270 	if (!iph)
271 		return export_flow_keys(keys, BPF_DROP);
272 
273 	/* IP header cannot be smaller than 20 bytes */
274 	if (iph->ihl < 5)
275 		return export_flow_keys(keys, BPF_DROP);
276 
277 	keys->addr_proto = ETH_P_IP;
278 	keys->ipv4_src = iph->saddr;
279 	keys->ipv4_dst = iph->daddr;
280 	keys->ip_proto = iph->protocol;
281 
282 	keys->thoff += iph->ihl << 2;
283 	if (data + keys->thoff > data_end)
284 		return export_flow_keys(keys, BPF_DROP);
285 
286 	if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
287 		keys->is_frag = true;
288 		if (iph->frag_off & bpf_htons(IP_OFFSET)) {
289 			/* From second fragment on, packets do not have headers
290 			 * we can parse.
291 			 */
292 			done = true;
293 		} else {
294 			keys->is_first_frag = true;
295 			/* No need to parse fragmented packet unless
296 			 * explicitly asked for.
297 			 */
298 			if (!(keys->flags &
299 			      BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
300 				done = true;
301 		}
302 	}
303 
304 	if (done)
305 		return export_flow_keys(keys, BPF_OK);
306 
307 	return parse_ip_proto(skb, iph->protocol);
308 }
309 
310 PROG(IPV6)(struct __sk_buff *skb)
311 {
312 	struct bpf_flow_keys *keys = skb->flow_keys;
313 	struct ipv6hdr *ip6h, _ip6h;
314 
315 	ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
316 	if (!ip6h)
317 		return export_flow_keys(keys, BPF_DROP);
318 
319 	keys->addr_proto = ETH_P_IPV6;
320 	memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
321 
322 	keys->thoff += sizeof(struct ipv6hdr);
323 	keys->ip_proto = ip6h->nexthdr;
324 	keys->flow_label = ip6_flowlabel(ip6h);
325 
326 	if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
327 		return export_flow_keys(keys, BPF_OK);
328 
329 	return parse_ipv6_proto(skb, ip6h->nexthdr);
330 }
331 
332 PROG(IPV6OP)(struct __sk_buff *skb)
333 {
334 	struct bpf_flow_keys *keys = skb->flow_keys;
335 	struct ipv6_opt_hdr *ip6h, _ip6h;
336 
337 	ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
338 	if (!ip6h)
339 		return export_flow_keys(keys, BPF_DROP);
340 
341 	/* hlen is in 8-octets and does not include the first 8 bytes
342 	 * of the header
343 	 */
344 	keys->thoff += (1 + ip6h->hdrlen) << 3;
345 	keys->ip_proto = ip6h->nexthdr;
346 
347 	return parse_ipv6_proto(skb, ip6h->nexthdr);
348 }
349 
350 PROG(IPV6FR)(struct __sk_buff *skb)
351 {
352 	struct bpf_flow_keys *keys = skb->flow_keys;
353 	struct frag_hdr *fragh, _fragh;
354 
355 	fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
356 	if (!fragh)
357 		return export_flow_keys(keys, BPF_DROP);
358 
359 	keys->thoff += sizeof(*fragh);
360 	keys->is_frag = true;
361 	keys->ip_proto = fragh->nexthdr;
362 
363 	if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) {
364 		keys->is_first_frag = true;
365 
366 		/* No need to parse fragmented packet unless
367 		 * explicitly asked for.
368 		 */
369 		if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
370 			return export_flow_keys(keys, BPF_OK);
371 	}
372 
373 	return parse_ipv6_proto(skb, fragh->nexthdr);
374 }
375 
376 PROG(MPLS)(struct __sk_buff *skb)
377 {
378 	struct bpf_flow_keys *keys = skb->flow_keys;
379 	struct mpls_label *mpls, _mpls;
380 
381 	mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
382 	if (!mpls)
383 		return export_flow_keys(keys, BPF_DROP);
384 
385 	return export_flow_keys(keys, BPF_OK);
386 }
387 
388 PROG(VLAN)(struct __sk_buff *skb)
389 {
390 	struct bpf_flow_keys *keys = skb->flow_keys;
391 	struct vlan_hdr *vlan, _vlan;
392 
393 	/* Account for double-tagging */
394 	if (keys->n_proto == bpf_htons(ETH_P_8021AD)) {
395 		vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
396 		if (!vlan)
397 			return export_flow_keys(keys, BPF_DROP);
398 
399 		if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
400 			return export_flow_keys(keys, BPF_DROP);
401 
402 		keys->nhoff += sizeof(*vlan);
403 		keys->thoff += sizeof(*vlan);
404 	}
405 
406 	vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
407 	if (!vlan)
408 		return export_flow_keys(keys, BPF_DROP);
409 
410 	keys->nhoff += sizeof(*vlan);
411 	keys->thoff += sizeof(*vlan);
412 	/* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
413 	if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
414 	    vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
415 		return export_flow_keys(keys, BPF_DROP);
416 
417 	keys->n_proto = vlan->h_vlan_encapsulated_proto;
418 	return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
419 }
420 
421 char __license[] SEC("license") = "GPL";
422