1 // SPDX-License-Identifier: GPL-2.0 2 #include <limits.h> 3 #include <stddef.h> 4 #include <stdbool.h> 5 #include <string.h> 6 #include <linux/pkt_cls.h> 7 #include <linux/bpf.h> 8 #include <linux/in.h> 9 #include <linux/if_ether.h> 10 #include <linux/icmp.h> 11 #include <linux/ip.h> 12 #include <linux/ipv6.h> 13 #include <linux/tcp.h> 14 #include <linux/udp.h> 15 #include <linux/if_packet.h> 16 #include <sys/socket.h> 17 #include <linux/if_tunnel.h> 18 #include <linux/mpls.h> 19 #include "bpf_helpers.h" 20 #include "bpf_endian.h" 21 22 int _version SEC("version") = 1; 23 #define PROG(F) SEC(#F) int bpf_func_##F 24 25 /* These are the identifiers of the BPF programs that will be used in tail 26 * calls. Name is limited to 16 characters, with the terminating character and 27 * bpf_func_ above, we have only 6 to work with, anything after will be cropped. 28 */ 29 enum { 30 IP, 31 IPV6, 32 IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */ 33 IPV6FR, /* Fragmentation IPv6 Extension Header */ 34 MPLS, 35 VLAN, 36 }; 37 38 #define IP_MF 0x2000 39 #define IP_OFFSET 0x1FFF 40 #define IP6_MF 0x0001 41 #define IP6_OFFSET 0xFFF8 42 43 struct vlan_hdr { 44 __be16 h_vlan_TCI; 45 __be16 h_vlan_encapsulated_proto; 46 }; 47 48 struct gre_hdr { 49 __be16 flags; 50 __be16 proto; 51 }; 52 53 struct frag_hdr { 54 __u8 nexthdr; 55 __u8 reserved; 56 __be16 frag_off; 57 __be32 identification; 58 }; 59 60 struct { 61 __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 62 __uint(max_entries, 8); 63 __uint(key_size, sizeof(__u32)); 64 __uint(value_size, sizeof(__u32)); 65 } jmp_table SEC(".maps"); 66 67 struct { 68 __uint(type, BPF_MAP_TYPE_HASH); 69 __uint(max_entries, 1024); 70 __type(key, __u32); 71 __type(value, struct bpf_flow_keys); 72 } last_dissection SEC(".maps"); 73 74 static __always_inline int export_flow_keys(struct bpf_flow_keys *keys, 75 int ret) 76 { 77 __u32 key = (__u32)(keys->sport) << 16 | keys->dport; 78 struct bpf_flow_keys val; 79 80 memcpy(&val, keys, sizeof(val)); 81 bpf_map_update_elem(&last_dissection, &key, &val, BPF_ANY); 82 return ret; 83 } 84 85 #define IPV6_FLOWLABEL_MASK __bpf_constant_htonl(0x000FFFFF) 86 static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr) 87 { 88 return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK; 89 } 90 91 static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, 92 __u16 hdr_size, 93 void *buffer) 94 { 95 void *data_end = (void *)(long)skb->data_end; 96 void *data = (void *)(long)skb->data; 97 __u16 thoff = skb->flow_keys->thoff; 98 __u8 *hdr; 99 100 /* Verifies this variable offset does not overflow */ 101 if (thoff > (USHRT_MAX - hdr_size)) 102 return NULL; 103 104 hdr = data + thoff; 105 if (hdr + hdr_size <= data_end) 106 return hdr; 107 108 if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size)) 109 return NULL; 110 111 return buffer; 112 } 113 114 /* Dispatches on ETHERTYPE */ 115 static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) 116 { 117 struct bpf_flow_keys *keys = skb->flow_keys; 118 119 switch (proto) { 120 case bpf_htons(ETH_P_IP): 121 bpf_tail_call(skb, &jmp_table, IP); 122 break; 123 case bpf_htons(ETH_P_IPV6): 124 bpf_tail_call(skb, &jmp_table, IPV6); 125 break; 126 case bpf_htons(ETH_P_MPLS_MC): 127 case bpf_htons(ETH_P_MPLS_UC): 128 bpf_tail_call(skb, &jmp_table, MPLS); 129 break; 130 case bpf_htons(ETH_P_8021Q): 131 case bpf_htons(ETH_P_8021AD): 132 bpf_tail_call(skb, &jmp_table, VLAN); 133 break; 134 default: 135 /* Protocol not supported */ 136 return export_flow_keys(keys, BPF_DROP); 137 } 138 139 return export_flow_keys(keys, BPF_DROP); 140 } 141 142 SEC("flow_dissector") 143 int _dissect(struct __sk_buff *skb) 144 { 145 struct bpf_flow_keys *keys = skb->flow_keys; 146 147 return parse_eth_proto(skb, keys->n_proto); 148 } 149 150 /* Parses on IPPROTO_* */ 151 static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) 152 { 153 struct bpf_flow_keys *keys = skb->flow_keys; 154 void *data_end = (void *)(long)skb->data_end; 155 struct icmphdr *icmp, _icmp; 156 struct gre_hdr *gre, _gre; 157 struct ethhdr *eth, _eth; 158 struct tcphdr *tcp, _tcp; 159 struct udphdr *udp, _udp; 160 161 switch (proto) { 162 case IPPROTO_ICMP: 163 icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp); 164 if (!icmp) 165 return export_flow_keys(keys, BPF_DROP); 166 return export_flow_keys(keys, BPF_OK); 167 case IPPROTO_IPIP: 168 keys->is_encap = true; 169 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP) 170 return export_flow_keys(keys, BPF_OK); 171 172 return parse_eth_proto(skb, bpf_htons(ETH_P_IP)); 173 case IPPROTO_IPV6: 174 keys->is_encap = true; 175 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP) 176 return export_flow_keys(keys, BPF_OK); 177 178 return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6)); 179 case IPPROTO_GRE: 180 gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre); 181 if (!gre) 182 return export_flow_keys(keys, BPF_DROP); 183 184 if (bpf_htons(gre->flags & GRE_VERSION)) 185 /* Only inspect standard GRE packets with version 0 */ 186 return export_flow_keys(keys, BPF_OK); 187 188 keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */ 189 if (GRE_IS_CSUM(gre->flags)) 190 keys->thoff += 4; /* Step over chksum and Padding */ 191 if (GRE_IS_KEY(gre->flags)) 192 keys->thoff += 4; /* Step over key */ 193 if (GRE_IS_SEQ(gre->flags)) 194 keys->thoff += 4; /* Step over sequence number */ 195 196 keys->is_encap = true; 197 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP) 198 return export_flow_keys(keys, BPF_OK); 199 200 if (gre->proto == bpf_htons(ETH_P_TEB)) { 201 eth = bpf_flow_dissect_get_header(skb, sizeof(*eth), 202 &_eth); 203 if (!eth) 204 return export_flow_keys(keys, BPF_DROP); 205 206 keys->thoff += sizeof(*eth); 207 208 return parse_eth_proto(skb, eth->h_proto); 209 } else { 210 return parse_eth_proto(skb, gre->proto); 211 } 212 case IPPROTO_TCP: 213 tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp); 214 if (!tcp) 215 return export_flow_keys(keys, BPF_DROP); 216 217 if (tcp->doff < 5) 218 return export_flow_keys(keys, BPF_DROP); 219 220 if ((__u8 *)tcp + (tcp->doff << 2) > data_end) 221 return export_flow_keys(keys, BPF_DROP); 222 223 keys->sport = tcp->source; 224 keys->dport = tcp->dest; 225 return export_flow_keys(keys, BPF_OK); 226 case IPPROTO_UDP: 227 case IPPROTO_UDPLITE: 228 udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp); 229 if (!udp) 230 return export_flow_keys(keys, BPF_DROP); 231 232 keys->sport = udp->source; 233 keys->dport = udp->dest; 234 return export_flow_keys(keys, BPF_OK); 235 default: 236 return export_flow_keys(keys, BPF_DROP); 237 } 238 239 return export_flow_keys(keys, BPF_DROP); 240 } 241 242 static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) 243 { 244 struct bpf_flow_keys *keys = skb->flow_keys; 245 246 switch (nexthdr) { 247 case IPPROTO_HOPOPTS: 248 case IPPROTO_DSTOPTS: 249 bpf_tail_call(skb, &jmp_table, IPV6OP); 250 break; 251 case IPPROTO_FRAGMENT: 252 bpf_tail_call(skb, &jmp_table, IPV6FR); 253 break; 254 default: 255 return parse_ip_proto(skb, nexthdr); 256 } 257 258 return export_flow_keys(keys, BPF_DROP); 259 } 260 261 PROG(IP)(struct __sk_buff *skb) 262 { 263 void *data_end = (void *)(long)skb->data_end; 264 struct bpf_flow_keys *keys = skb->flow_keys; 265 void *data = (void *)(long)skb->data; 266 struct iphdr *iph, _iph; 267 bool done = false; 268 269 iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph); 270 if (!iph) 271 return export_flow_keys(keys, BPF_DROP); 272 273 /* IP header cannot be smaller than 20 bytes */ 274 if (iph->ihl < 5) 275 return export_flow_keys(keys, BPF_DROP); 276 277 keys->addr_proto = ETH_P_IP; 278 keys->ipv4_src = iph->saddr; 279 keys->ipv4_dst = iph->daddr; 280 keys->ip_proto = iph->protocol; 281 282 keys->thoff += iph->ihl << 2; 283 if (data + keys->thoff > data_end) 284 return export_flow_keys(keys, BPF_DROP); 285 286 if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { 287 keys->is_frag = true; 288 if (iph->frag_off & bpf_htons(IP_OFFSET)) { 289 /* From second fragment on, packets do not have headers 290 * we can parse. 291 */ 292 done = true; 293 } else { 294 keys->is_first_frag = true; 295 /* No need to parse fragmented packet unless 296 * explicitly asked for. 297 */ 298 if (!(keys->flags & 299 BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) 300 done = true; 301 } 302 } 303 304 if (done) 305 return export_flow_keys(keys, BPF_OK); 306 307 return parse_ip_proto(skb, iph->protocol); 308 } 309 310 PROG(IPV6)(struct __sk_buff *skb) 311 { 312 struct bpf_flow_keys *keys = skb->flow_keys; 313 struct ipv6hdr *ip6h, _ip6h; 314 315 ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); 316 if (!ip6h) 317 return export_flow_keys(keys, BPF_DROP); 318 319 keys->addr_proto = ETH_P_IPV6; 320 memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); 321 322 keys->thoff += sizeof(struct ipv6hdr); 323 keys->ip_proto = ip6h->nexthdr; 324 keys->flow_label = ip6_flowlabel(ip6h); 325 326 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) 327 return export_flow_keys(keys, BPF_OK); 328 329 return parse_ipv6_proto(skb, ip6h->nexthdr); 330 } 331 332 PROG(IPV6OP)(struct __sk_buff *skb) 333 { 334 struct bpf_flow_keys *keys = skb->flow_keys; 335 struct ipv6_opt_hdr *ip6h, _ip6h; 336 337 ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); 338 if (!ip6h) 339 return export_flow_keys(keys, BPF_DROP); 340 341 /* hlen is in 8-octets and does not include the first 8 bytes 342 * of the header 343 */ 344 keys->thoff += (1 + ip6h->hdrlen) << 3; 345 keys->ip_proto = ip6h->nexthdr; 346 347 return parse_ipv6_proto(skb, ip6h->nexthdr); 348 } 349 350 PROG(IPV6FR)(struct __sk_buff *skb) 351 { 352 struct bpf_flow_keys *keys = skb->flow_keys; 353 struct frag_hdr *fragh, _fragh; 354 355 fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh); 356 if (!fragh) 357 return export_flow_keys(keys, BPF_DROP); 358 359 keys->thoff += sizeof(*fragh); 360 keys->is_frag = true; 361 keys->ip_proto = fragh->nexthdr; 362 363 if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) { 364 keys->is_first_frag = true; 365 366 /* No need to parse fragmented packet unless 367 * explicitly asked for. 368 */ 369 if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) 370 return export_flow_keys(keys, BPF_OK); 371 } 372 373 return parse_ipv6_proto(skb, fragh->nexthdr); 374 } 375 376 PROG(MPLS)(struct __sk_buff *skb) 377 { 378 struct bpf_flow_keys *keys = skb->flow_keys; 379 struct mpls_label *mpls, _mpls; 380 381 mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls); 382 if (!mpls) 383 return export_flow_keys(keys, BPF_DROP); 384 385 return export_flow_keys(keys, BPF_OK); 386 } 387 388 PROG(VLAN)(struct __sk_buff *skb) 389 { 390 struct bpf_flow_keys *keys = skb->flow_keys; 391 struct vlan_hdr *vlan, _vlan; 392 393 /* Account for double-tagging */ 394 if (keys->n_proto == bpf_htons(ETH_P_8021AD)) { 395 vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); 396 if (!vlan) 397 return export_flow_keys(keys, BPF_DROP); 398 399 if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) 400 return export_flow_keys(keys, BPF_DROP); 401 402 keys->nhoff += sizeof(*vlan); 403 keys->thoff += sizeof(*vlan); 404 } 405 406 vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); 407 if (!vlan) 408 return export_flow_keys(keys, BPF_DROP); 409 410 keys->nhoff += sizeof(*vlan); 411 keys->thoff += sizeof(*vlan); 412 /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ 413 if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || 414 vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) 415 return export_flow_keys(keys, BPF_DROP); 416 417 keys->n_proto = vlan->h_vlan_encapsulated_proto; 418 return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto); 419 } 420 421 char __license[] SEC("license") = "GPL"; 422