123458901SLorenz Bauer // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 223458901SLorenz Bauer // Copyright (c) 2019, 2020 Cloudflare 323458901SLorenz Bauer 423458901SLorenz Bauer #include <stdbool.h> 523458901SLorenz Bauer #include <stddef.h> 623458901SLorenz Bauer #include <stdint.h> 723458901SLorenz Bauer #include <string.h> 823458901SLorenz Bauer 923458901SLorenz Bauer #include <linux/bpf.h> 1023458901SLorenz Bauer #include <linux/icmp.h> 1123458901SLorenz Bauer #include <linux/icmpv6.h> 1223458901SLorenz Bauer #include <linux/if_ether.h> 1323458901SLorenz Bauer #include <linux/in.h> 1423458901SLorenz Bauer #include <linux/ip.h> 1523458901SLorenz Bauer #include <linux/ipv6.h> 1623458901SLorenz Bauer #include <linux/pkt_cls.h> 1723458901SLorenz Bauer #include <linux/tcp.h> 1823458901SLorenz Bauer #include <linux/udp.h> 1923458901SLorenz Bauer 2023458901SLorenz Bauer #include <bpf/bpf_helpers.h> 2123458901SLorenz Bauer #include <bpf/bpf_endian.h> 2223458901SLorenz Bauer 2323458901SLorenz Bauer #include "test_cls_redirect.h" 2423458901SLorenz Bauer 25ee333df5SAndrii Nakryiko #ifdef SUBPROGS 26ee333df5SAndrii Nakryiko #define INLINING __noinline 27ee333df5SAndrii Nakryiko #else 28ee333df5SAndrii Nakryiko #define INLINING __always_inline 29ee333df5SAndrii Nakryiko #endif 30ee333df5SAndrii Nakryiko 3123458901SLorenz Bauer #define offsetofend(TYPE, MEMBER) \ 3223458901SLorenz Bauer (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER))) 3323458901SLorenz Bauer 3423458901SLorenz Bauer #define IP_OFFSET_MASK (0x1FFF) 3523458901SLorenz Bauer #define IP_MF (0x2000) 3623458901SLorenz Bauer 3723458901SLorenz Bauer char _license[] SEC("license") = "Dual BSD/GPL"; 3823458901SLorenz Bauer 3923458901SLorenz Bauer /** 4023458901SLorenz Bauer * Destination port and IP used for UDP encapsulation. 4123458901SLorenz Bauer */ 42256eab48SAndrii Nakryiko volatile const __be16 ENCAPSULATION_PORT; 43256eab48SAndrii Nakryiko volatile const __be32 ENCAPSULATION_IP; 4423458901SLorenz Bauer 4523458901SLorenz Bauer typedef struct { 4623458901SLorenz Bauer uint64_t processed_packets_total; 4723458901SLorenz Bauer uint64_t l3_protocol_packets_total_ipv4; 4823458901SLorenz Bauer uint64_t l3_protocol_packets_total_ipv6; 4923458901SLorenz Bauer uint64_t l4_protocol_packets_total_tcp; 5023458901SLorenz Bauer uint64_t l4_protocol_packets_total_udp; 5123458901SLorenz Bauer uint64_t accepted_packets_total_syn; 5223458901SLorenz Bauer uint64_t accepted_packets_total_syn_cookies; 5323458901SLorenz Bauer uint64_t accepted_packets_total_last_hop; 5423458901SLorenz Bauer uint64_t accepted_packets_total_icmp_echo_request; 5523458901SLorenz Bauer uint64_t accepted_packets_total_established; 5623458901SLorenz Bauer uint64_t forwarded_packets_total_gue; 5723458901SLorenz Bauer uint64_t forwarded_packets_total_gre; 5823458901SLorenz Bauer 5923458901SLorenz Bauer uint64_t errors_total_unknown_l3_proto; 6023458901SLorenz Bauer uint64_t errors_total_unknown_l4_proto; 6123458901SLorenz Bauer uint64_t errors_total_malformed_ip; 6223458901SLorenz Bauer uint64_t errors_total_fragmented_ip; 6323458901SLorenz Bauer uint64_t errors_total_malformed_icmp; 6423458901SLorenz Bauer uint64_t errors_total_unwanted_icmp; 6523458901SLorenz Bauer uint64_t errors_total_malformed_icmp_pkt_too_big; 6623458901SLorenz Bauer uint64_t errors_total_malformed_tcp; 6723458901SLorenz Bauer uint64_t errors_total_malformed_udp; 6823458901SLorenz Bauer uint64_t errors_total_icmp_echo_replies; 6923458901SLorenz Bauer uint64_t errors_total_malformed_encapsulation; 7023458901SLorenz Bauer uint64_t errors_total_encap_adjust_failed; 7123458901SLorenz Bauer uint64_t errors_total_encap_buffer_too_small; 7223458901SLorenz Bauer uint64_t errors_total_redirect_loop; 736b8838beSJesper Dangaard Brouer uint64_t errors_total_encap_mtu_violate; 7423458901SLorenz Bauer } metrics_t; 7523458901SLorenz Bauer 7623458901SLorenz Bauer typedef enum { 7723458901SLorenz Bauer INVALID = 0, 7823458901SLorenz Bauer UNKNOWN, 7923458901SLorenz Bauer ECHO_REQUEST, 8023458901SLorenz Bauer SYN, 8123458901SLorenz Bauer SYN_COOKIE, 8223458901SLorenz Bauer ESTABLISHED, 8323458901SLorenz Bauer } verdict_t; 8423458901SLorenz Bauer 8523458901SLorenz Bauer typedef struct { 8623458901SLorenz Bauer uint16_t src, dst; 8723458901SLorenz Bauer } flow_ports_t; 8823458901SLorenz Bauer 8923458901SLorenz Bauer _Static_assert( 9023458901SLorenz Bauer sizeof(flow_ports_t) != 9123458901SLorenz Bauer offsetofend(struct bpf_sock_tuple, ipv4.dport) - 9223458901SLorenz Bauer offsetof(struct bpf_sock_tuple, ipv4.sport) - 1, 9323458901SLorenz Bauer "flow_ports_t must match sport and dport in struct bpf_sock_tuple"); 9423458901SLorenz Bauer _Static_assert( 9523458901SLorenz Bauer sizeof(flow_ports_t) != 9623458901SLorenz Bauer offsetofend(struct bpf_sock_tuple, ipv6.dport) - 9723458901SLorenz Bauer offsetof(struct bpf_sock_tuple, ipv6.sport) - 1, 9823458901SLorenz Bauer "flow_ports_t must match sport and dport in struct bpf_sock_tuple"); 9923458901SLorenz Bauer 10023458901SLorenz Bauer typedef int ret_t; 10123458901SLorenz Bauer 10223458901SLorenz Bauer /* This is a bit of a hack. We need a return value which allows us to 10323458901SLorenz Bauer * indicate that the regular flow of the program should continue, 10423458901SLorenz Bauer * while allowing functions to use XDP_PASS and XDP_DROP, etc. 10523458901SLorenz Bauer */ 10623458901SLorenz Bauer static const ret_t CONTINUE_PROCESSING = -1; 10723458901SLorenz Bauer 10823458901SLorenz Bauer /* Convenience macro to call functions which return ret_t. 10923458901SLorenz Bauer */ 11023458901SLorenz Bauer #define MAYBE_RETURN(x) \ 11123458901SLorenz Bauer do { \ 11223458901SLorenz Bauer ret_t __ret = x; \ 11323458901SLorenz Bauer if (__ret != CONTINUE_PROCESSING) \ 11423458901SLorenz Bauer return __ret; \ 11523458901SLorenz Bauer } while (0) 11623458901SLorenz Bauer 11723458901SLorenz Bauer /* Linux packet pointers are either aligned to NET_IP_ALIGN (aka 2 bytes), 11823458901SLorenz Bauer * or not aligned if the arch supports efficient unaligned access. 11923458901SLorenz Bauer * 12023458901SLorenz Bauer * Since the verifier ensures that eBPF packet accesses follow these rules, 12123458901SLorenz Bauer * we can tell LLVM to emit code as if we always had a larger alignment. 12223458901SLorenz Bauer * It will yell at us if we end up on a platform where this is not valid. 12323458901SLorenz Bauer */ 12423458901SLorenz Bauer typedef uint8_t *net_ptr __attribute__((align_value(8))); 12523458901SLorenz Bauer 12623458901SLorenz Bauer typedef struct buf { 12723458901SLorenz Bauer struct __sk_buff *skb; 12823458901SLorenz Bauer net_ptr head; 12923458901SLorenz Bauer /* NB: tail musn't have alignment other than 1, otherwise 13023458901SLorenz Bauer * LLVM will go and eliminate code, e.g. when checking packet lengths. 13123458901SLorenz Bauer */ 13223458901SLorenz Bauer uint8_t *const tail; 13323458901SLorenz Bauer } buf_t; 13423458901SLorenz Bauer 135ee333df5SAndrii Nakryiko static __always_inline size_t buf_off(const buf_t *buf) 13623458901SLorenz Bauer { 13723458901SLorenz Bauer /* Clang seems to optimize constructs like 13823458901SLorenz Bauer * a - b + c 13923458901SLorenz Bauer * if c is known: 14023458901SLorenz Bauer * r? = c 14123458901SLorenz Bauer * r? -= b 14223458901SLorenz Bauer * r? += a 14323458901SLorenz Bauer * 14423458901SLorenz Bauer * This is a problem if a and b are packet pointers, 14523458901SLorenz Bauer * since the verifier allows subtracting two pointers to 14623458901SLorenz Bauer * get a scalar, but not a scalar and a pointer. 14723458901SLorenz Bauer * 14823458901SLorenz Bauer * Use inline asm to break this optimization. 14923458901SLorenz Bauer */ 15023458901SLorenz Bauer size_t off = (size_t)buf->head; 15123458901SLorenz Bauer asm("%0 -= %1" : "+r"(off) : "r"(buf->skb->data)); 15223458901SLorenz Bauer return off; 15323458901SLorenz Bauer } 15423458901SLorenz Bauer 155ee333df5SAndrii Nakryiko static __always_inline bool buf_copy(buf_t *buf, void *dst, size_t len) 15623458901SLorenz Bauer { 15723458901SLorenz Bauer if (bpf_skb_load_bytes(buf->skb, buf_off(buf), dst, len)) { 15823458901SLorenz Bauer return false; 15923458901SLorenz Bauer } 16023458901SLorenz Bauer 16123458901SLorenz Bauer buf->head += len; 16223458901SLorenz Bauer return true; 16323458901SLorenz Bauer } 16423458901SLorenz Bauer 165ee333df5SAndrii Nakryiko static __always_inline bool buf_skip(buf_t *buf, const size_t len) 16623458901SLorenz Bauer { 16723458901SLorenz Bauer /* Check whether off + len is valid in the non-linear part. */ 16823458901SLorenz Bauer if (buf_off(buf) + len > buf->skb->len) { 16923458901SLorenz Bauer return false; 17023458901SLorenz Bauer } 17123458901SLorenz Bauer 17223458901SLorenz Bauer buf->head += len; 17323458901SLorenz Bauer return true; 17423458901SLorenz Bauer } 17523458901SLorenz Bauer 17623458901SLorenz Bauer /* Returns a pointer to the start of buf, or NULL if len is 17723458901SLorenz Bauer * larger than the remaining data. Consumes len bytes on a successful 17823458901SLorenz Bauer * call. 17923458901SLorenz Bauer * 18023458901SLorenz Bauer * If scratch is not NULL, the function will attempt to load non-linear 18123458901SLorenz Bauer * data via bpf_skb_load_bytes. On success, scratch is returned. 18223458901SLorenz Bauer */ 183ee333df5SAndrii Nakryiko static __always_inline void *buf_assign(buf_t *buf, const size_t len, void *scratch) 18423458901SLorenz Bauer { 18523458901SLorenz Bauer if (buf->head + len > buf->tail) { 18623458901SLorenz Bauer if (scratch == NULL) { 18723458901SLorenz Bauer return NULL; 18823458901SLorenz Bauer } 18923458901SLorenz Bauer 19023458901SLorenz Bauer return buf_copy(buf, scratch, len) ? scratch : NULL; 19123458901SLorenz Bauer } 19223458901SLorenz Bauer 19323458901SLorenz Bauer void *ptr = buf->head; 19423458901SLorenz Bauer buf->head += len; 19523458901SLorenz Bauer return ptr; 19623458901SLorenz Bauer } 19723458901SLorenz Bauer 198ee333df5SAndrii Nakryiko static INLINING bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4) 19923458901SLorenz Bauer { 20023458901SLorenz Bauer if (ipv4->ihl <= 5) { 20123458901SLorenz Bauer return true; 20223458901SLorenz Bauer } 20323458901SLorenz Bauer 20423458901SLorenz Bauer return buf_skip(buf, (ipv4->ihl - 5) * 4); 20523458901SLorenz Bauer } 20623458901SLorenz Bauer 207ee333df5SAndrii Nakryiko static INLINING bool ipv4_is_fragment(const struct iphdr *ip) 20823458901SLorenz Bauer { 20923458901SLorenz Bauer uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK); 21023458901SLorenz Bauer return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0; 21123458901SLorenz Bauer } 21223458901SLorenz Bauer 213ee333df5SAndrii Nakryiko static __always_inline struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch) 21423458901SLorenz Bauer { 21523458901SLorenz Bauer struct iphdr *ipv4 = buf_assign(pkt, sizeof(*ipv4), scratch); 21623458901SLorenz Bauer if (ipv4 == NULL) { 21723458901SLorenz Bauer return NULL; 21823458901SLorenz Bauer } 21923458901SLorenz Bauer 22023458901SLorenz Bauer if (ipv4->ihl < 5) { 22123458901SLorenz Bauer return NULL; 22223458901SLorenz Bauer } 22323458901SLorenz Bauer 22423458901SLorenz Bauer if (!pkt_skip_ipv4_options(pkt, ipv4)) { 22523458901SLorenz Bauer return NULL; 22623458901SLorenz Bauer } 22723458901SLorenz Bauer 22823458901SLorenz Bauer return ipv4; 22923458901SLorenz Bauer } 23023458901SLorenz Bauer 23123458901SLorenz Bauer /* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */ 232ee333df5SAndrii Nakryiko static INLINING bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports) 23323458901SLorenz Bauer { 23423458901SLorenz Bauer if (!buf_copy(pkt, ports, sizeof(*ports))) { 23523458901SLorenz Bauer return false; 23623458901SLorenz Bauer } 23723458901SLorenz Bauer 23823458901SLorenz Bauer /* Ports in the L4 headers are reversed, since we are parsing an ICMP 23923458901SLorenz Bauer * payload which is going towards the eyeball. 24023458901SLorenz Bauer */ 24123458901SLorenz Bauer uint16_t dst = ports->src; 24223458901SLorenz Bauer ports->src = ports->dst; 24323458901SLorenz Bauer ports->dst = dst; 24423458901SLorenz Bauer return true; 24523458901SLorenz Bauer } 24623458901SLorenz Bauer 247ee333df5SAndrii Nakryiko static INLINING uint16_t pkt_checksum_fold(uint32_t csum) 24823458901SLorenz Bauer { 24923458901SLorenz Bauer /* The highest reasonable value for an IPv4 header 25023458901SLorenz Bauer * checksum requires two folds, so we just do that always. 25123458901SLorenz Bauer */ 25223458901SLorenz Bauer csum = (csum & 0xffff) + (csum >> 16); 25323458901SLorenz Bauer csum = (csum & 0xffff) + (csum >> 16); 25423458901SLorenz Bauer return (uint16_t)~csum; 25523458901SLorenz Bauer } 25623458901SLorenz Bauer 257ee333df5SAndrii Nakryiko static INLINING void pkt_ipv4_checksum(struct iphdr *iph) 25823458901SLorenz Bauer { 25923458901SLorenz Bauer iph->check = 0; 26023458901SLorenz Bauer 26123458901SLorenz Bauer /* An IP header without options is 20 bytes. Two of those 26223458901SLorenz Bauer * are the checksum, which we always set to zero. Hence, 26323458901SLorenz Bauer * the maximum accumulated value is 18 / 2 * 0xffff = 0x8fff7, 26423458901SLorenz Bauer * which fits in 32 bit. 26523458901SLorenz Bauer */ 26623458901SLorenz Bauer _Static_assert(sizeof(struct iphdr) == 20, "iphdr must be 20 bytes"); 26723458901SLorenz Bauer uint32_t acc = 0; 26823458901SLorenz Bauer uint16_t *ipw = (uint16_t *)iph; 26923458901SLorenz Bauer 27023458901SLorenz Bauer #pragma clang loop unroll(full) 27123458901SLorenz Bauer for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++) { 27223458901SLorenz Bauer acc += ipw[i]; 27323458901SLorenz Bauer } 27423458901SLorenz Bauer 27523458901SLorenz Bauer iph->check = pkt_checksum_fold(acc); 27623458901SLorenz Bauer } 27723458901SLorenz Bauer 278ee333df5SAndrii Nakryiko static INLINING 279ee333df5SAndrii Nakryiko bool pkt_skip_ipv6_extension_headers(buf_t *pkt, 28023458901SLorenz Bauer const struct ipv6hdr *ipv6, 28123458901SLorenz Bauer uint8_t *upper_proto, 28223458901SLorenz Bauer bool *is_fragment) 28323458901SLorenz Bauer { 28423458901SLorenz Bauer /* We understand five extension headers. 28523458901SLorenz Bauer * https://tools.ietf.org/html/rfc8200#section-4.1 states that all 28623458901SLorenz Bauer * headers should occur once, except Destination Options, which may 28723458901SLorenz Bauer * occur twice. Hence we give up after 6 headers. 28823458901SLorenz Bauer */ 28923458901SLorenz Bauer struct { 29023458901SLorenz Bauer uint8_t next; 29123458901SLorenz Bauer uint8_t len; 29223458901SLorenz Bauer } exthdr = { 29323458901SLorenz Bauer .next = ipv6->nexthdr, 29423458901SLorenz Bauer }; 29523458901SLorenz Bauer *is_fragment = false; 29623458901SLorenz Bauer 29723458901SLorenz Bauer #pragma clang loop unroll(full) 29823458901SLorenz Bauer for (int i = 0; i < 6; i++) { 29923458901SLorenz Bauer switch (exthdr.next) { 30023458901SLorenz Bauer case IPPROTO_FRAGMENT: 30123458901SLorenz Bauer *is_fragment = true; 30223458901SLorenz Bauer /* NB: We don't check that hdrlen == 0 as per spec. */ 30323458901SLorenz Bauer /* fallthrough; */ 30423458901SLorenz Bauer 30523458901SLorenz Bauer case IPPROTO_HOPOPTS: 30623458901SLorenz Bauer case IPPROTO_ROUTING: 30723458901SLorenz Bauer case IPPROTO_DSTOPTS: 30823458901SLorenz Bauer case IPPROTO_MH: 30923458901SLorenz Bauer if (!buf_copy(pkt, &exthdr, sizeof(exthdr))) { 31023458901SLorenz Bauer return false; 31123458901SLorenz Bauer } 31223458901SLorenz Bauer 31323458901SLorenz Bauer /* hdrlen is in 8-octet units, and excludes the first 8 octets. */ 31423458901SLorenz Bauer if (!buf_skip(pkt, 31523458901SLorenz Bauer (exthdr.len + 1) * 8 - sizeof(exthdr))) { 31623458901SLorenz Bauer return false; 31723458901SLorenz Bauer } 31823458901SLorenz Bauer 31923458901SLorenz Bauer /* Decode next header */ 32023458901SLorenz Bauer break; 32123458901SLorenz Bauer 32223458901SLorenz Bauer default: 32323458901SLorenz Bauer /* The next header is not one of the known extension 32423458901SLorenz Bauer * headers, treat it as the upper layer header. 32523458901SLorenz Bauer * 32623458901SLorenz Bauer * This handles IPPROTO_NONE. 32723458901SLorenz Bauer * 32823458901SLorenz Bauer * Encapsulating Security Payload (50) and Authentication 32923458901SLorenz Bauer * Header (51) also end up here (and will trigger an 33023458901SLorenz Bauer * unknown proto error later). They have a custom header 33123458901SLorenz Bauer * format and seem too esoteric to care about. 33223458901SLorenz Bauer */ 33323458901SLorenz Bauer *upper_proto = exthdr.next; 33423458901SLorenz Bauer return true; 33523458901SLorenz Bauer } 33623458901SLorenz Bauer } 33723458901SLorenz Bauer 33823458901SLorenz Bauer /* We never found an upper layer header. */ 33923458901SLorenz Bauer return false; 34023458901SLorenz Bauer } 34123458901SLorenz Bauer 34223458901SLorenz Bauer /* This function has to be inlined, because the verifier otherwise rejects it 34323458901SLorenz Bauer * due to returning a pointer to the stack. This is technically correct, since 34423458901SLorenz Bauer * scratch is allocated on the stack. However, this usage should be safe since 34523458901SLorenz Bauer * it's the callers stack after all. 34623458901SLorenz Bauer */ 347ee333df5SAndrii Nakryiko static __always_inline struct ipv6hdr * 34823458901SLorenz Bauer pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto, 34923458901SLorenz Bauer bool *is_fragment) 35023458901SLorenz Bauer { 35123458901SLorenz Bauer struct ipv6hdr *ipv6 = buf_assign(pkt, sizeof(*ipv6), scratch); 35223458901SLorenz Bauer if (ipv6 == NULL) { 35323458901SLorenz Bauer return NULL; 35423458901SLorenz Bauer } 35523458901SLorenz Bauer 35623458901SLorenz Bauer if (!pkt_skip_ipv6_extension_headers(pkt, ipv6, proto, is_fragment)) { 35723458901SLorenz Bauer return NULL; 35823458901SLorenz Bauer } 35923458901SLorenz Bauer 36023458901SLorenz Bauer return ipv6; 36123458901SLorenz Bauer } 36223458901SLorenz Bauer 36323458901SLorenz Bauer /* Global metrics, per CPU 36423458901SLorenz Bauer */ 365ee333df5SAndrii Nakryiko struct { 366ee333df5SAndrii Nakryiko __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 367ee333df5SAndrii Nakryiko __uint(max_entries, 1); 368ee333df5SAndrii Nakryiko __type(key, unsigned int); 369ee333df5SAndrii Nakryiko __type(value, metrics_t); 370ee333df5SAndrii Nakryiko } metrics_map SEC(".maps"); 37123458901SLorenz Bauer 372ee333df5SAndrii Nakryiko static INLINING metrics_t *get_global_metrics(void) 37323458901SLorenz Bauer { 37423458901SLorenz Bauer uint64_t key = 0; 37523458901SLorenz Bauer return bpf_map_lookup_elem(&metrics_map, &key); 37623458901SLorenz Bauer } 37723458901SLorenz Bauer 378ee333df5SAndrii Nakryiko static INLINING ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap) 37923458901SLorenz Bauer { 38023458901SLorenz Bauer const int payload_off = 38123458901SLorenz Bauer sizeof(*encap) + 38223458901SLorenz Bauer sizeof(struct in_addr) * encap->unigue.hop_count; 38323458901SLorenz Bauer int32_t encap_overhead = payload_off - sizeof(struct ethhdr); 38423458901SLorenz Bauer 38523458901SLorenz Bauer // Changing the ethertype if the encapsulated packet is ipv6 38623458901SLorenz Bauer if (encap->gue.proto_ctype == IPPROTO_IPV6) { 38723458901SLorenz Bauer encap->eth.h_proto = bpf_htons(ETH_P_IPV6); 38823458901SLorenz Bauer } 38923458901SLorenz Bauer 39023458901SLorenz Bauer if (bpf_skb_adjust_room(skb, -encap_overhead, BPF_ADJ_ROOM_MAC, 391c4ba153bSDaniel Borkmann BPF_F_ADJ_ROOM_FIXED_GSO | 392c4ba153bSDaniel Borkmann BPF_F_ADJ_ROOM_NO_CSUM_RESET) || 393c4ba153bSDaniel Borkmann bpf_csum_level(skb, BPF_CSUM_LEVEL_DEC)) 39423458901SLorenz Bauer return TC_ACT_SHOT; 39523458901SLorenz Bauer 39623458901SLorenz Bauer return bpf_redirect(skb->ifindex, BPF_F_INGRESS); 39723458901SLorenz Bauer } 39823458901SLorenz Bauer 399ee333df5SAndrii Nakryiko static INLINING ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap, 40023458901SLorenz Bauer struct in_addr *next_hop, metrics_t *metrics) 40123458901SLorenz Bauer { 40223458901SLorenz Bauer metrics->forwarded_packets_total_gre++; 40323458901SLorenz Bauer 40423458901SLorenz Bauer const int payload_off = 40523458901SLorenz Bauer sizeof(*encap) + 40623458901SLorenz Bauer sizeof(struct in_addr) * encap->unigue.hop_count; 40723458901SLorenz Bauer int32_t encap_overhead = 40823458901SLorenz Bauer payload_off - sizeof(struct ethhdr) - sizeof(struct iphdr); 40923458901SLorenz Bauer int32_t delta = sizeof(struct gre_base_hdr) - encap_overhead; 41023458901SLorenz Bauer uint16_t proto = ETH_P_IP; 4116b8838beSJesper Dangaard Brouer uint32_t mtu_len = 0; 41223458901SLorenz Bauer 41323458901SLorenz Bauer /* Loop protection: the inner packet's TTL is decremented as a safeguard 41423458901SLorenz Bauer * against any forwarding loop. As the only interesting field is the TTL 41523458901SLorenz Bauer * hop limit for IPv6, it is easier to use bpf_skb_load_bytes/bpf_skb_store_bytes 41623458901SLorenz Bauer * as they handle the split packets if needed (no need for the data to be 41723458901SLorenz Bauer * in the linear section). 41823458901SLorenz Bauer */ 41923458901SLorenz Bauer if (encap->gue.proto_ctype == IPPROTO_IPV6) { 42023458901SLorenz Bauer proto = ETH_P_IPV6; 42123458901SLorenz Bauer uint8_t ttl; 42223458901SLorenz Bauer int rc; 42323458901SLorenz Bauer 42423458901SLorenz Bauer rc = bpf_skb_load_bytes( 42523458901SLorenz Bauer skb, payload_off + offsetof(struct ipv6hdr, hop_limit), 42623458901SLorenz Bauer &ttl, 1); 42723458901SLorenz Bauer if (rc != 0) { 42823458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 42923458901SLorenz Bauer return TC_ACT_SHOT; 43023458901SLorenz Bauer } 43123458901SLorenz Bauer 43223458901SLorenz Bauer if (ttl == 0) { 43323458901SLorenz Bauer metrics->errors_total_redirect_loop++; 43423458901SLorenz Bauer return TC_ACT_SHOT; 43523458901SLorenz Bauer } 43623458901SLorenz Bauer 43723458901SLorenz Bauer ttl--; 43823458901SLorenz Bauer rc = bpf_skb_store_bytes( 43923458901SLorenz Bauer skb, payload_off + offsetof(struct ipv6hdr, hop_limit), 44023458901SLorenz Bauer &ttl, 1, 0); 44123458901SLorenz Bauer if (rc != 0) { 44223458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 44323458901SLorenz Bauer return TC_ACT_SHOT; 44423458901SLorenz Bauer } 44523458901SLorenz Bauer } else { 44623458901SLorenz Bauer uint8_t ttl; 44723458901SLorenz Bauer int rc; 44823458901SLorenz Bauer 44923458901SLorenz Bauer rc = bpf_skb_load_bytes( 45023458901SLorenz Bauer skb, payload_off + offsetof(struct iphdr, ttl), &ttl, 45123458901SLorenz Bauer 1); 45223458901SLorenz Bauer if (rc != 0) { 45323458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 45423458901SLorenz Bauer return TC_ACT_SHOT; 45523458901SLorenz Bauer } 45623458901SLorenz Bauer 45723458901SLorenz Bauer if (ttl == 0) { 45823458901SLorenz Bauer metrics->errors_total_redirect_loop++; 45923458901SLorenz Bauer return TC_ACT_SHOT; 46023458901SLorenz Bauer } 46123458901SLorenz Bauer 46223458901SLorenz Bauer /* IPv4 also has a checksum to patch. While the TTL is only one byte, 46323458901SLorenz Bauer * this function only works for 2 and 4 bytes arguments (the result is 46423458901SLorenz Bauer * the same). 46523458901SLorenz Bauer */ 46623458901SLorenz Bauer rc = bpf_l3_csum_replace( 46723458901SLorenz Bauer skb, payload_off + offsetof(struct iphdr, check), ttl, 46823458901SLorenz Bauer ttl - 1, 2); 46923458901SLorenz Bauer if (rc != 0) { 47023458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 47123458901SLorenz Bauer return TC_ACT_SHOT; 47223458901SLorenz Bauer } 47323458901SLorenz Bauer 47423458901SLorenz Bauer ttl--; 47523458901SLorenz Bauer rc = bpf_skb_store_bytes( 47623458901SLorenz Bauer skb, payload_off + offsetof(struct iphdr, ttl), &ttl, 1, 47723458901SLorenz Bauer 0); 47823458901SLorenz Bauer if (rc != 0) { 47923458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 48023458901SLorenz Bauer return TC_ACT_SHOT; 48123458901SLorenz Bauer } 48223458901SLorenz Bauer } 48323458901SLorenz Bauer 4846b8838beSJesper Dangaard Brouer if (bpf_check_mtu(skb, skb->ifindex, &mtu_len, delta, 0)) { 4856b8838beSJesper Dangaard Brouer metrics->errors_total_encap_mtu_violate++; 4866b8838beSJesper Dangaard Brouer return TC_ACT_SHOT; 4876b8838beSJesper Dangaard Brouer } 4886b8838beSJesper Dangaard Brouer 48923458901SLorenz Bauer if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET, 490c4ba153bSDaniel Borkmann BPF_F_ADJ_ROOM_FIXED_GSO | 491c4ba153bSDaniel Borkmann BPF_F_ADJ_ROOM_NO_CSUM_RESET) || 492c4ba153bSDaniel Borkmann bpf_csum_level(skb, BPF_CSUM_LEVEL_INC)) { 49323458901SLorenz Bauer metrics->errors_total_encap_adjust_failed++; 49423458901SLorenz Bauer return TC_ACT_SHOT; 49523458901SLorenz Bauer } 49623458901SLorenz Bauer 49723458901SLorenz Bauer if (bpf_skb_pull_data(skb, sizeof(encap_gre_t))) { 49823458901SLorenz Bauer metrics->errors_total_encap_buffer_too_small++; 49923458901SLorenz Bauer return TC_ACT_SHOT; 50023458901SLorenz Bauer } 50123458901SLorenz Bauer 50223458901SLorenz Bauer buf_t pkt = { 50323458901SLorenz Bauer .skb = skb, 50423458901SLorenz Bauer .head = (uint8_t *)(long)skb->data, 50523458901SLorenz Bauer .tail = (uint8_t *)(long)skb->data_end, 50623458901SLorenz Bauer }; 50723458901SLorenz Bauer 50823458901SLorenz Bauer encap_gre_t *encap_gre = buf_assign(&pkt, sizeof(encap_gre_t), NULL); 50923458901SLorenz Bauer if (encap_gre == NULL) { 51023458901SLorenz Bauer metrics->errors_total_encap_buffer_too_small++; 51123458901SLorenz Bauer return TC_ACT_SHOT; 51223458901SLorenz Bauer } 51323458901SLorenz Bauer 51423458901SLorenz Bauer encap_gre->ip.protocol = IPPROTO_GRE; 51523458901SLorenz Bauer encap_gre->ip.daddr = next_hop->s_addr; 51623458901SLorenz Bauer encap_gre->ip.saddr = ENCAPSULATION_IP; 51723458901SLorenz Bauer encap_gre->ip.tot_len = 51823458901SLorenz Bauer bpf_htons(bpf_ntohs(encap_gre->ip.tot_len) + delta); 51923458901SLorenz Bauer encap_gre->gre.flags = 0; 52023458901SLorenz Bauer encap_gre->gre.protocol = bpf_htons(proto); 52123458901SLorenz Bauer pkt_ipv4_checksum((void *)&encap_gre->ip); 52223458901SLorenz Bauer 52323458901SLorenz Bauer return bpf_redirect(skb->ifindex, 0); 52423458901SLorenz Bauer } 52523458901SLorenz Bauer 526ee333df5SAndrii Nakryiko static INLINING ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap, 52723458901SLorenz Bauer struct in_addr *next_hop, metrics_t *metrics) 52823458901SLorenz Bauer { 52923458901SLorenz Bauer /* swap L2 addresses */ 53023458901SLorenz Bauer /* This assumes that packets are received from a router. 53123458901SLorenz Bauer * So just swapping the MAC addresses here will make the packet go back to 53223458901SLorenz Bauer * the router, which will send it to the appropriate machine. 53323458901SLorenz Bauer */ 53423458901SLorenz Bauer unsigned char temp[ETH_ALEN]; 53523458901SLorenz Bauer memcpy(temp, encap->eth.h_dest, sizeof(temp)); 53623458901SLorenz Bauer memcpy(encap->eth.h_dest, encap->eth.h_source, 53723458901SLorenz Bauer sizeof(encap->eth.h_dest)); 53823458901SLorenz Bauer memcpy(encap->eth.h_source, temp, sizeof(encap->eth.h_source)); 53923458901SLorenz Bauer 54023458901SLorenz Bauer if (encap->unigue.next_hop == encap->unigue.hop_count - 1 && 54123458901SLorenz Bauer encap->unigue.last_hop_gre) { 54223458901SLorenz Bauer return forward_with_gre(skb, encap, next_hop, metrics); 54323458901SLorenz Bauer } 54423458901SLorenz Bauer 54523458901SLorenz Bauer metrics->forwarded_packets_total_gue++; 54623458901SLorenz Bauer uint32_t old_saddr = encap->ip.saddr; 54723458901SLorenz Bauer encap->ip.saddr = encap->ip.daddr; 54823458901SLorenz Bauer encap->ip.daddr = next_hop->s_addr; 54923458901SLorenz Bauer if (encap->unigue.next_hop < encap->unigue.hop_count) { 55023458901SLorenz Bauer encap->unigue.next_hop++; 55123458901SLorenz Bauer } 55223458901SLorenz Bauer 55323458901SLorenz Bauer /* Remove ip->saddr, add next_hop->s_addr */ 55423458901SLorenz Bauer const uint64_t off = offsetof(typeof(*encap), ip.check); 55523458901SLorenz Bauer int ret = bpf_l3_csum_replace(skb, off, old_saddr, next_hop->s_addr, 4); 55623458901SLorenz Bauer if (ret < 0) { 55723458901SLorenz Bauer return TC_ACT_SHOT; 55823458901SLorenz Bauer } 55923458901SLorenz Bauer 56023458901SLorenz Bauer return bpf_redirect(skb->ifindex, 0); 56123458901SLorenz Bauer } 56223458901SLorenz Bauer 563ee333df5SAndrii Nakryiko static INLINING ret_t skip_next_hops(buf_t *pkt, int n) 56423458901SLorenz Bauer { 56523458901SLorenz Bauer switch (n) { 56623458901SLorenz Bauer case 1: 56723458901SLorenz Bauer if (!buf_skip(pkt, sizeof(struct in_addr))) 56823458901SLorenz Bauer return TC_ACT_SHOT; 56923458901SLorenz Bauer case 0: 57023458901SLorenz Bauer return CONTINUE_PROCESSING; 57123458901SLorenz Bauer 57223458901SLorenz Bauer default: 57323458901SLorenz Bauer return TC_ACT_SHOT; 57423458901SLorenz Bauer } 57523458901SLorenz Bauer } 57623458901SLorenz Bauer 57723458901SLorenz Bauer /* Get the next hop from the GLB header. 57823458901SLorenz Bauer * 57923458901SLorenz Bauer * Sets next_hop->s_addr to 0 if there are no more hops left. 58023458901SLorenz Bauer * pkt is positioned just after the variable length GLB header 58123458901SLorenz Bauer * iff the call is successful. 58223458901SLorenz Bauer */ 583ee333df5SAndrii Nakryiko static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap, 58423458901SLorenz Bauer struct in_addr *next_hop) 58523458901SLorenz Bauer { 58623458901SLorenz Bauer if (encap->unigue.next_hop > encap->unigue.hop_count) { 58723458901SLorenz Bauer return TC_ACT_SHOT; 58823458901SLorenz Bauer } 58923458901SLorenz Bauer 59023458901SLorenz Bauer /* Skip "used" next hops. */ 59123458901SLorenz Bauer MAYBE_RETURN(skip_next_hops(pkt, encap->unigue.next_hop)); 59223458901SLorenz Bauer 59323458901SLorenz Bauer if (encap->unigue.next_hop == encap->unigue.hop_count) { 59423458901SLorenz Bauer /* No more next hops, we are at the end of the GLB header. */ 59523458901SLorenz Bauer next_hop->s_addr = 0; 59623458901SLorenz Bauer return CONTINUE_PROCESSING; 59723458901SLorenz Bauer } 59823458901SLorenz Bauer 59923458901SLorenz Bauer if (!buf_copy(pkt, next_hop, sizeof(*next_hop))) { 60023458901SLorenz Bauer return TC_ACT_SHOT; 60123458901SLorenz Bauer } 60223458901SLorenz Bauer 60323458901SLorenz Bauer /* Skip the remainig next hops (may be zero). */ 60423458901SLorenz Bauer return skip_next_hops(pkt, encap->unigue.hop_count - 60523458901SLorenz Bauer encap->unigue.next_hop - 1); 60623458901SLorenz Bauer } 60723458901SLorenz Bauer 60823458901SLorenz Bauer /* Fill a bpf_sock_tuple to be used with the socket lookup functions. 60923458901SLorenz Bauer * This is a kludge that let's us work around verifier limitations: 61023458901SLorenz Bauer * 61123458901SLorenz Bauer * fill_tuple(&t, foo, sizeof(struct iphdr), 123, 321) 61223458901SLorenz Bauer * 61323458901SLorenz Bauer * clang will substitue a costant for sizeof, which allows the verifier 61423458901SLorenz Bauer * to track it's value. Based on this, it can figure out the constant 61523458901SLorenz Bauer * return value, and calling code works while still being "generic" to 61623458901SLorenz Bauer * IPv4 and IPv6. 61723458901SLorenz Bauer */ 618ee333df5SAndrii Nakryiko static INLINING uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph, 61923458901SLorenz Bauer uint64_t iphlen, uint16_t sport, uint16_t dport) 62023458901SLorenz Bauer { 62123458901SLorenz Bauer switch (iphlen) { 62223458901SLorenz Bauer case sizeof(struct iphdr): { 62323458901SLorenz Bauer struct iphdr *ipv4 = (struct iphdr *)iph; 62423458901SLorenz Bauer tuple->ipv4.daddr = ipv4->daddr; 62523458901SLorenz Bauer tuple->ipv4.saddr = ipv4->saddr; 62623458901SLorenz Bauer tuple->ipv4.sport = sport; 62723458901SLorenz Bauer tuple->ipv4.dport = dport; 62823458901SLorenz Bauer return sizeof(tuple->ipv4); 62923458901SLorenz Bauer } 63023458901SLorenz Bauer 63123458901SLorenz Bauer case sizeof(struct ipv6hdr): { 63223458901SLorenz Bauer struct ipv6hdr *ipv6 = (struct ipv6hdr *)iph; 63323458901SLorenz Bauer memcpy(&tuple->ipv6.daddr, &ipv6->daddr, 63423458901SLorenz Bauer sizeof(tuple->ipv6.daddr)); 63523458901SLorenz Bauer memcpy(&tuple->ipv6.saddr, &ipv6->saddr, 63623458901SLorenz Bauer sizeof(tuple->ipv6.saddr)); 63723458901SLorenz Bauer tuple->ipv6.sport = sport; 63823458901SLorenz Bauer tuple->ipv6.dport = dport; 63923458901SLorenz Bauer return sizeof(tuple->ipv6); 64023458901SLorenz Bauer } 64123458901SLorenz Bauer 64223458901SLorenz Bauer default: 64323458901SLorenz Bauer return 0; 64423458901SLorenz Bauer } 64523458901SLorenz Bauer } 64623458901SLorenz Bauer 647ee333df5SAndrii Nakryiko static INLINING verdict_t classify_tcp(struct __sk_buff *skb, 64823458901SLorenz Bauer struct bpf_sock_tuple *tuple, uint64_t tuplen, 64923458901SLorenz Bauer void *iph, struct tcphdr *tcp) 65023458901SLorenz Bauer { 65123458901SLorenz Bauer struct bpf_sock *sk = 65223458901SLorenz Bauer bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); 65323458901SLorenz Bauer if (sk == NULL) { 65423458901SLorenz Bauer return UNKNOWN; 65523458901SLorenz Bauer } 65623458901SLorenz Bauer 65723458901SLorenz Bauer if (sk->state != BPF_TCP_LISTEN) { 65823458901SLorenz Bauer bpf_sk_release(sk); 65923458901SLorenz Bauer return ESTABLISHED; 66023458901SLorenz Bauer } 66123458901SLorenz Bauer 66223458901SLorenz Bauer if (iph != NULL && tcp != NULL) { 66323458901SLorenz Bauer /* Kludge: we've run out of arguments, but need the length of the ip header. */ 66423458901SLorenz Bauer uint64_t iphlen = sizeof(struct iphdr); 66523458901SLorenz Bauer if (tuplen == sizeof(tuple->ipv6)) { 66623458901SLorenz Bauer iphlen = sizeof(struct ipv6hdr); 66723458901SLorenz Bauer } 66823458901SLorenz Bauer 66923458901SLorenz Bauer if (bpf_tcp_check_syncookie(sk, iph, iphlen, tcp, 67023458901SLorenz Bauer sizeof(*tcp)) == 0) { 67123458901SLorenz Bauer bpf_sk_release(sk); 67223458901SLorenz Bauer return SYN_COOKIE; 67323458901SLorenz Bauer } 67423458901SLorenz Bauer } 67523458901SLorenz Bauer 67623458901SLorenz Bauer bpf_sk_release(sk); 67723458901SLorenz Bauer return UNKNOWN; 67823458901SLorenz Bauer } 67923458901SLorenz Bauer 680ee333df5SAndrii Nakryiko static INLINING verdict_t classify_udp(struct __sk_buff *skb, 68123458901SLorenz Bauer struct bpf_sock_tuple *tuple, uint64_t tuplen) 68223458901SLorenz Bauer { 68323458901SLorenz Bauer struct bpf_sock *sk = 68423458901SLorenz Bauer bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); 68523458901SLorenz Bauer if (sk == NULL) { 68623458901SLorenz Bauer return UNKNOWN; 68723458901SLorenz Bauer } 68823458901SLorenz Bauer 68923458901SLorenz Bauer if (sk->state == BPF_TCP_ESTABLISHED) { 69023458901SLorenz Bauer bpf_sk_release(sk); 69123458901SLorenz Bauer return ESTABLISHED; 69223458901SLorenz Bauer } 69323458901SLorenz Bauer 69423458901SLorenz Bauer bpf_sk_release(sk); 69523458901SLorenz Bauer return UNKNOWN; 69623458901SLorenz Bauer } 69723458901SLorenz Bauer 698ee333df5SAndrii Nakryiko static INLINING verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, 69923458901SLorenz Bauer struct bpf_sock_tuple *tuple, uint64_t tuplen, 70023458901SLorenz Bauer metrics_t *metrics) 70123458901SLorenz Bauer { 70223458901SLorenz Bauer switch (proto) { 70323458901SLorenz Bauer case IPPROTO_TCP: 70423458901SLorenz Bauer return classify_tcp(skb, tuple, tuplen, NULL, NULL); 70523458901SLorenz Bauer 70623458901SLorenz Bauer case IPPROTO_UDP: 70723458901SLorenz Bauer return classify_udp(skb, tuple, tuplen); 70823458901SLorenz Bauer 70923458901SLorenz Bauer default: 71023458901SLorenz Bauer metrics->errors_total_malformed_icmp++; 71123458901SLorenz Bauer return INVALID; 71223458901SLorenz Bauer } 71323458901SLorenz Bauer } 71423458901SLorenz Bauer 715ee333df5SAndrii Nakryiko static INLINING verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics) 71623458901SLorenz Bauer { 71723458901SLorenz Bauer struct icmphdr icmp; 71823458901SLorenz Bauer if (!buf_copy(pkt, &icmp, sizeof(icmp))) { 71923458901SLorenz Bauer metrics->errors_total_malformed_icmp++; 72023458901SLorenz Bauer return INVALID; 72123458901SLorenz Bauer } 72223458901SLorenz Bauer 72323458901SLorenz Bauer /* We should never receive encapsulated echo replies. */ 72423458901SLorenz Bauer if (icmp.type == ICMP_ECHOREPLY) { 72523458901SLorenz Bauer metrics->errors_total_icmp_echo_replies++; 72623458901SLorenz Bauer return INVALID; 72723458901SLorenz Bauer } 72823458901SLorenz Bauer 72923458901SLorenz Bauer if (icmp.type == ICMP_ECHO) { 73023458901SLorenz Bauer return ECHO_REQUEST; 73123458901SLorenz Bauer } 73223458901SLorenz Bauer 73323458901SLorenz Bauer if (icmp.type != ICMP_DEST_UNREACH || icmp.code != ICMP_FRAG_NEEDED) { 73423458901SLorenz Bauer metrics->errors_total_unwanted_icmp++; 73523458901SLorenz Bauer return INVALID; 73623458901SLorenz Bauer } 73723458901SLorenz Bauer 73823458901SLorenz Bauer struct iphdr _ip4; 73923458901SLorenz Bauer const struct iphdr *ipv4 = pkt_parse_ipv4(pkt, &_ip4); 74023458901SLorenz Bauer if (ipv4 == NULL) { 74123458901SLorenz Bauer metrics->errors_total_malformed_icmp_pkt_too_big++; 74223458901SLorenz Bauer return INVALID; 74323458901SLorenz Bauer } 74423458901SLorenz Bauer 74523458901SLorenz Bauer /* The source address in the outer IP header is from the entity that 74623458901SLorenz Bauer * originated the ICMP message. Use the original IP header to restore 74723458901SLorenz Bauer * the correct flow tuple. 74823458901SLorenz Bauer */ 74923458901SLorenz Bauer struct bpf_sock_tuple tuple; 75023458901SLorenz Bauer tuple.ipv4.saddr = ipv4->daddr; 75123458901SLorenz Bauer tuple.ipv4.daddr = ipv4->saddr; 75223458901SLorenz Bauer 75323458901SLorenz Bauer if (!pkt_parse_icmp_l4_ports(pkt, (flow_ports_t *)&tuple.ipv4.sport)) { 75423458901SLorenz Bauer metrics->errors_total_malformed_icmp_pkt_too_big++; 75523458901SLorenz Bauer return INVALID; 75623458901SLorenz Bauer } 75723458901SLorenz Bauer 75823458901SLorenz Bauer return classify_icmp(pkt->skb, ipv4->protocol, &tuple, 75923458901SLorenz Bauer sizeof(tuple.ipv4), metrics); 76023458901SLorenz Bauer } 76123458901SLorenz Bauer 762ee333df5SAndrii Nakryiko static INLINING verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics) 76323458901SLorenz Bauer { 76423458901SLorenz Bauer struct icmp6hdr icmp6; 76523458901SLorenz Bauer if (!buf_copy(pkt, &icmp6, sizeof(icmp6))) { 76623458901SLorenz Bauer metrics->errors_total_malformed_icmp++; 76723458901SLorenz Bauer return INVALID; 76823458901SLorenz Bauer } 76923458901SLorenz Bauer 77023458901SLorenz Bauer /* We should never receive encapsulated echo replies. */ 77123458901SLorenz Bauer if (icmp6.icmp6_type == ICMPV6_ECHO_REPLY) { 77223458901SLorenz Bauer metrics->errors_total_icmp_echo_replies++; 77323458901SLorenz Bauer return INVALID; 77423458901SLorenz Bauer } 77523458901SLorenz Bauer 77623458901SLorenz Bauer if (icmp6.icmp6_type == ICMPV6_ECHO_REQUEST) { 77723458901SLorenz Bauer return ECHO_REQUEST; 77823458901SLorenz Bauer } 77923458901SLorenz Bauer 78023458901SLorenz Bauer if (icmp6.icmp6_type != ICMPV6_PKT_TOOBIG) { 78123458901SLorenz Bauer metrics->errors_total_unwanted_icmp++; 78223458901SLorenz Bauer return INVALID; 78323458901SLorenz Bauer } 78423458901SLorenz Bauer 78523458901SLorenz Bauer bool is_fragment; 78623458901SLorenz Bauer uint8_t l4_proto; 78723458901SLorenz Bauer struct ipv6hdr _ipv6; 78823458901SLorenz Bauer const struct ipv6hdr *ipv6 = 78923458901SLorenz Bauer pkt_parse_ipv6(pkt, &_ipv6, &l4_proto, &is_fragment); 79023458901SLorenz Bauer if (ipv6 == NULL) { 79123458901SLorenz Bauer metrics->errors_total_malformed_icmp_pkt_too_big++; 79223458901SLorenz Bauer return INVALID; 79323458901SLorenz Bauer } 79423458901SLorenz Bauer 79523458901SLorenz Bauer if (is_fragment) { 79623458901SLorenz Bauer metrics->errors_total_fragmented_ip++; 79723458901SLorenz Bauer return INVALID; 79823458901SLorenz Bauer } 79923458901SLorenz Bauer 80023458901SLorenz Bauer /* Swap source and dest addresses. */ 80123458901SLorenz Bauer struct bpf_sock_tuple tuple; 80223458901SLorenz Bauer memcpy(&tuple.ipv6.saddr, &ipv6->daddr, sizeof(tuple.ipv6.saddr)); 80323458901SLorenz Bauer memcpy(&tuple.ipv6.daddr, &ipv6->saddr, sizeof(tuple.ipv6.daddr)); 80423458901SLorenz Bauer 80523458901SLorenz Bauer if (!pkt_parse_icmp_l4_ports(pkt, (flow_ports_t *)&tuple.ipv6.sport)) { 80623458901SLorenz Bauer metrics->errors_total_malformed_icmp_pkt_too_big++; 80723458901SLorenz Bauer return INVALID; 80823458901SLorenz Bauer } 80923458901SLorenz Bauer 81023458901SLorenz Bauer return classify_icmp(pkt->skb, l4_proto, &tuple, sizeof(tuple.ipv6), 81123458901SLorenz Bauer metrics); 81223458901SLorenz Bauer } 81323458901SLorenz Bauer 814ee333df5SAndrii Nakryiko static INLINING verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen, 81523458901SLorenz Bauer metrics_t *metrics) 81623458901SLorenz Bauer { 81723458901SLorenz Bauer metrics->l4_protocol_packets_total_tcp++; 81823458901SLorenz Bauer 81923458901SLorenz Bauer struct tcphdr _tcp; 82023458901SLorenz Bauer struct tcphdr *tcp = buf_assign(pkt, sizeof(_tcp), &_tcp); 82123458901SLorenz Bauer if (tcp == NULL) { 82223458901SLorenz Bauer metrics->errors_total_malformed_tcp++; 82323458901SLorenz Bauer return INVALID; 82423458901SLorenz Bauer } 82523458901SLorenz Bauer 82623458901SLorenz Bauer if (tcp->syn) { 82723458901SLorenz Bauer return SYN; 82823458901SLorenz Bauer } 82923458901SLorenz Bauer 83023458901SLorenz Bauer struct bpf_sock_tuple tuple; 83123458901SLorenz Bauer uint64_t tuplen = 83223458901SLorenz Bauer fill_tuple(&tuple, iph, iphlen, tcp->source, tcp->dest); 83323458901SLorenz Bauer return classify_tcp(pkt->skb, &tuple, tuplen, iph, tcp); 83423458901SLorenz Bauer } 83523458901SLorenz Bauer 836ee333df5SAndrii Nakryiko static INLINING verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen, 83723458901SLorenz Bauer metrics_t *metrics) 83823458901SLorenz Bauer { 83923458901SLorenz Bauer metrics->l4_protocol_packets_total_udp++; 84023458901SLorenz Bauer 84123458901SLorenz Bauer struct udphdr _udp; 84223458901SLorenz Bauer struct udphdr *udph = buf_assign(pkt, sizeof(_udp), &_udp); 84323458901SLorenz Bauer if (udph == NULL) { 84423458901SLorenz Bauer metrics->errors_total_malformed_udp++; 84523458901SLorenz Bauer return INVALID; 84623458901SLorenz Bauer } 84723458901SLorenz Bauer 84823458901SLorenz Bauer struct bpf_sock_tuple tuple; 84923458901SLorenz Bauer uint64_t tuplen = 85023458901SLorenz Bauer fill_tuple(&tuple, iph, iphlen, udph->source, udph->dest); 85123458901SLorenz Bauer return classify_udp(pkt->skb, &tuple, tuplen); 85223458901SLorenz Bauer } 85323458901SLorenz Bauer 854ee333df5SAndrii Nakryiko static INLINING verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics) 85523458901SLorenz Bauer { 85623458901SLorenz Bauer metrics->l3_protocol_packets_total_ipv4++; 85723458901SLorenz Bauer 85823458901SLorenz Bauer struct iphdr _ip4; 85923458901SLorenz Bauer struct iphdr *ipv4 = pkt_parse_ipv4(pkt, &_ip4); 86023458901SLorenz Bauer if (ipv4 == NULL) { 86123458901SLorenz Bauer metrics->errors_total_malformed_ip++; 86223458901SLorenz Bauer return INVALID; 86323458901SLorenz Bauer } 86423458901SLorenz Bauer 86523458901SLorenz Bauer if (ipv4->version != 4) { 86623458901SLorenz Bauer metrics->errors_total_malformed_ip++; 86723458901SLorenz Bauer return INVALID; 86823458901SLorenz Bauer } 86923458901SLorenz Bauer 87023458901SLorenz Bauer if (ipv4_is_fragment(ipv4)) { 87123458901SLorenz Bauer metrics->errors_total_fragmented_ip++; 87223458901SLorenz Bauer return INVALID; 87323458901SLorenz Bauer } 87423458901SLorenz Bauer 87523458901SLorenz Bauer switch (ipv4->protocol) { 87623458901SLorenz Bauer case IPPROTO_ICMP: 87723458901SLorenz Bauer return process_icmpv4(pkt, metrics); 87823458901SLorenz Bauer 87923458901SLorenz Bauer case IPPROTO_TCP: 88023458901SLorenz Bauer return process_tcp(pkt, ipv4, sizeof(*ipv4), metrics); 88123458901SLorenz Bauer 88223458901SLorenz Bauer case IPPROTO_UDP: 88323458901SLorenz Bauer return process_udp(pkt, ipv4, sizeof(*ipv4), metrics); 88423458901SLorenz Bauer 88523458901SLorenz Bauer default: 88623458901SLorenz Bauer metrics->errors_total_unknown_l4_proto++; 88723458901SLorenz Bauer return INVALID; 88823458901SLorenz Bauer } 88923458901SLorenz Bauer } 89023458901SLorenz Bauer 891ee333df5SAndrii Nakryiko static INLINING verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics) 89223458901SLorenz Bauer { 89323458901SLorenz Bauer metrics->l3_protocol_packets_total_ipv6++; 89423458901SLorenz Bauer 89523458901SLorenz Bauer uint8_t l4_proto; 89623458901SLorenz Bauer bool is_fragment; 89723458901SLorenz Bauer struct ipv6hdr _ipv6; 89823458901SLorenz Bauer struct ipv6hdr *ipv6 = 89923458901SLorenz Bauer pkt_parse_ipv6(pkt, &_ipv6, &l4_proto, &is_fragment); 90023458901SLorenz Bauer if (ipv6 == NULL) { 90123458901SLorenz Bauer metrics->errors_total_malformed_ip++; 90223458901SLorenz Bauer return INVALID; 90323458901SLorenz Bauer } 90423458901SLorenz Bauer 90523458901SLorenz Bauer if (ipv6->version != 6) { 90623458901SLorenz Bauer metrics->errors_total_malformed_ip++; 90723458901SLorenz Bauer return INVALID; 90823458901SLorenz Bauer } 90923458901SLorenz Bauer 91023458901SLorenz Bauer if (is_fragment) { 91123458901SLorenz Bauer metrics->errors_total_fragmented_ip++; 91223458901SLorenz Bauer return INVALID; 91323458901SLorenz Bauer } 91423458901SLorenz Bauer 91523458901SLorenz Bauer switch (l4_proto) { 91623458901SLorenz Bauer case IPPROTO_ICMPV6: 91723458901SLorenz Bauer return process_icmpv6(pkt, metrics); 91823458901SLorenz Bauer 91923458901SLorenz Bauer case IPPROTO_TCP: 92023458901SLorenz Bauer return process_tcp(pkt, ipv6, sizeof(*ipv6), metrics); 92123458901SLorenz Bauer 92223458901SLorenz Bauer case IPPROTO_UDP: 92323458901SLorenz Bauer return process_udp(pkt, ipv6, sizeof(*ipv6), metrics); 92423458901SLorenz Bauer 92523458901SLorenz Bauer default: 92623458901SLorenz Bauer metrics->errors_total_unknown_l4_proto++; 92723458901SLorenz Bauer return INVALID; 92823458901SLorenz Bauer } 92923458901SLorenz Bauer } 93023458901SLorenz Bauer 931*c22bdd28SAndrii Nakryiko SEC("tc") 93223458901SLorenz Bauer int cls_redirect(struct __sk_buff *skb) 93323458901SLorenz Bauer { 93423458901SLorenz Bauer metrics_t *metrics = get_global_metrics(); 93523458901SLorenz Bauer if (metrics == NULL) { 93623458901SLorenz Bauer return TC_ACT_SHOT; 93723458901SLorenz Bauer } 93823458901SLorenz Bauer 93923458901SLorenz Bauer metrics->processed_packets_total++; 94023458901SLorenz Bauer 94123458901SLorenz Bauer /* Pass bogus packets as long as we're not sure they're 94223458901SLorenz Bauer * destined for us. 94323458901SLorenz Bauer */ 94423458901SLorenz Bauer if (skb->protocol != bpf_htons(ETH_P_IP)) { 94523458901SLorenz Bauer return TC_ACT_OK; 94623458901SLorenz Bauer } 94723458901SLorenz Bauer 94823458901SLorenz Bauer encap_headers_t *encap; 94923458901SLorenz Bauer 95023458901SLorenz Bauer /* Make sure that all encapsulation headers are available in 95123458901SLorenz Bauer * the linear portion of the skb. This makes it easy to manipulate them. 95223458901SLorenz Bauer */ 95323458901SLorenz Bauer if (bpf_skb_pull_data(skb, sizeof(*encap))) { 95423458901SLorenz Bauer return TC_ACT_OK; 95523458901SLorenz Bauer } 95623458901SLorenz Bauer 95723458901SLorenz Bauer buf_t pkt = { 95823458901SLorenz Bauer .skb = skb, 95923458901SLorenz Bauer .head = (uint8_t *)(long)skb->data, 96023458901SLorenz Bauer .tail = (uint8_t *)(long)skb->data_end, 96123458901SLorenz Bauer }; 96223458901SLorenz Bauer 96323458901SLorenz Bauer encap = buf_assign(&pkt, sizeof(*encap), NULL); 96423458901SLorenz Bauer if (encap == NULL) { 96523458901SLorenz Bauer return TC_ACT_OK; 96623458901SLorenz Bauer } 96723458901SLorenz Bauer 96823458901SLorenz Bauer if (encap->ip.ihl != 5) { 96923458901SLorenz Bauer /* We never have any options. */ 97023458901SLorenz Bauer return TC_ACT_OK; 97123458901SLorenz Bauer } 97223458901SLorenz Bauer 97323458901SLorenz Bauer if (encap->ip.daddr != ENCAPSULATION_IP || 97423458901SLorenz Bauer encap->ip.protocol != IPPROTO_UDP) { 97523458901SLorenz Bauer return TC_ACT_OK; 97623458901SLorenz Bauer } 97723458901SLorenz Bauer 97823458901SLorenz Bauer /* TODO Check UDP length? */ 97923458901SLorenz Bauer if (encap->udp.dest != ENCAPSULATION_PORT) { 98023458901SLorenz Bauer return TC_ACT_OK; 98123458901SLorenz Bauer } 98223458901SLorenz Bauer 98323458901SLorenz Bauer /* We now know that the packet is destined to us, we can 98423458901SLorenz Bauer * drop bogus ones. 98523458901SLorenz Bauer */ 98623458901SLorenz Bauer if (ipv4_is_fragment((void *)&encap->ip)) { 98723458901SLorenz Bauer metrics->errors_total_fragmented_ip++; 98823458901SLorenz Bauer return TC_ACT_SHOT; 98923458901SLorenz Bauer } 99023458901SLorenz Bauer 99123458901SLorenz Bauer if (encap->gue.variant != 0) { 99223458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 99323458901SLorenz Bauer return TC_ACT_SHOT; 99423458901SLorenz Bauer } 99523458901SLorenz Bauer 99623458901SLorenz Bauer if (encap->gue.control != 0) { 99723458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 99823458901SLorenz Bauer return TC_ACT_SHOT; 99923458901SLorenz Bauer } 100023458901SLorenz Bauer 100123458901SLorenz Bauer if (encap->gue.flags != 0) { 100223458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 100323458901SLorenz Bauer return TC_ACT_SHOT; 100423458901SLorenz Bauer } 100523458901SLorenz Bauer 100623458901SLorenz Bauer if (encap->gue.hlen != 100723458901SLorenz Bauer sizeof(encap->unigue) / 4 + encap->unigue.hop_count) { 100823458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 100923458901SLorenz Bauer return TC_ACT_SHOT; 101023458901SLorenz Bauer } 101123458901SLorenz Bauer 101223458901SLorenz Bauer if (encap->unigue.version != 0) { 101323458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 101423458901SLorenz Bauer return TC_ACT_SHOT; 101523458901SLorenz Bauer } 101623458901SLorenz Bauer 101723458901SLorenz Bauer if (encap->unigue.reserved != 0) { 101823458901SLorenz Bauer return TC_ACT_SHOT; 101923458901SLorenz Bauer } 102023458901SLorenz Bauer 102123458901SLorenz Bauer struct in_addr next_hop; 102223458901SLorenz Bauer MAYBE_RETURN(get_next_hop(&pkt, encap, &next_hop)); 102323458901SLorenz Bauer 102423458901SLorenz Bauer if (next_hop.s_addr == 0) { 102523458901SLorenz Bauer metrics->accepted_packets_total_last_hop++; 102623458901SLorenz Bauer return accept_locally(skb, encap); 102723458901SLorenz Bauer } 102823458901SLorenz Bauer 102923458901SLorenz Bauer verdict_t verdict; 103023458901SLorenz Bauer switch (encap->gue.proto_ctype) { 103123458901SLorenz Bauer case IPPROTO_IPIP: 103223458901SLorenz Bauer verdict = process_ipv4(&pkt, metrics); 103323458901SLorenz Bauer break; 103423458901SLorenz Bauer 103523458901SLorenz Bauer case IPPROTO_IPV6: 103623458901SLorenz Bauer verdict = process_ipv6(&pkt, metrics); 103723458901SLorenz Bauer break; 103823458901SLorenz Bauer 103923458901SLorenz Bauer default: 104023458901SLorenz Bauer metrics->errors_total_unknown_l3_proto++; 104123458901SLorenz Bauer return TC_ACT_SHOT; 104223458901SLorenz Bauer } 104323458901SLorenz Bauer 104423458901SLorenz Bauer switch (verdict) { 104523458901SLorenz Bauer case INVALID: 104623458901SLorenz Bauer /* metrics have already been bumped */ 104723458901SLorenz Bauer return TC_ACT_SHOT; 104823458901SLorenz Bauer 104923458901SLorenz Bauer case UNKNOWN: 105023458901SLorenz Bauer return forward_to_next_hop(skb, encap, &next_hop, metrics); 105123458901SLorenz Bauer 105223458901SLorenz Bauer case ECHO_REQUEST: 105323458901SLorenz Bauer metrics->accepted_packets_total_icmp_echo_request++; 105423458901SLorenz Bauer break; 105523458901SLorenz Bauer 105623458901SLorenz Bauer case SYN: 105723458901SLorenz Bauer if (encap->unigue.forward_syn) { 105823458901SLorenz Bauer return forward_to_next_hop(skb, encap, &next_hop, 105923458901SLorenz Bauer metrics); 106023458901SLorenz Bauer } 106123458901SLorenz Bauer 106223458901SLorenz Bauer metrics->accepted_packets_total_syn++; 106323458901SLorenz Bauer break; 106423458901SLorenz Bauer 106523458901SLorenz Bauer case SYN_COOKIE: 106623458901SLorenz Bauer metrics->accepted_packets_total_syn_cookies++; 106723458901SLorenz Bauer break; 106823458901SLorenz Bauer 106923458901SLorenz Bauer case ESTABLISHED: 107023458901SLorenz Bauer metrics->accepted_packets_total_established++; 107123458901SLorenz Bauer break; 107223458901SLorenz Bauer } 107323458901SLorenz Bauer 107423458901SLorenz Bauer return accept_locally(skb, encap); 107523458901SLorenz Bauer } 1076