123458901SLorenz Bauer // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 223458901SLorenz Bauer // Copyright (c) 2019, 2020 Cloudflare 323458901SLorenz Bauer 423458901SLorenz Bauer #include <stdbool.h> 523458901SLorenz Bauer #include <stddef.h> 623458901SLorenz Bauer #include <stdint.h> 723458901SLorenz Bauer #include <string.h> 823458901SLorenz Bauer 923458901SLorenz Bauer #include <linux/bpf.h> 1023458901SLorenz Bauer #include <linux/icmp.h> 1123458901SLorenz Bauer #include <linux/icmpv6.h> 1223458901SLorenz Bauer #include <linux/if_ether.h> 1323458901SLorenz Bauer #include <linux/in.h> 1423458901SLorenz Bauer #include <linux/ip.h> 1523458901SLorenz Bauer #include <linux/ipv6.h> 1623458901SLorenz Bauer #include <linux/pkt_cls.h> 1723458901SLorenz Bauer #include <linux/tcp.h> 1823458901SLorenz Bauer #include <linux/udp.h> 1923458901SLorenz Bauer 2023458901SLorenz Bauer #include <bpf/bpf_helpers.h> 2123458901SLorenz Bauer #include <bpf/bpf_endian.h> 2223458901SLorenz Bauer 2323458901SLorenz Bauer #include "test_cls_redirect.h" 2423458901SLorenz Bauer 25ee333df5SAndrii Nakryiko #ifdef SUBPROGS 26ee333df5SAndrii Nakryiko #define INLINING __noinline 27ee333df5SAndrii Nakryiko #else 28ee333df5SAndrii Nakryiko #define INLINING __always_inline 29ee333df5SAndrii Nakryiko #endif 30ee333df5SAndrii Nakryiko 3123458901SLorenz Bauer #define offsetofend(TYPE, MEMBER) \ 3223458901SLorenz Bauer (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER))) 3323458901SLorenz Bauer 3423458901SLorenz Bauer #define IP_OFFSET_MASK (0x1FFF) 3523458901SLorenz Bauer #define IP_MF (0x2000) 3623458901SLorenz Bauer 3723458901SLorenz Bauer char _license[] SEC("license") = "Dual BSD/GPL"; 3823458901SLorenz Bauer 3923458901SLorenz Bauer /** 4023458901SLorenz Bauer * Destination port and IP used for UDP encapsulation. 4123458901SLorenz Bauer */ 4223458901SLorenz Bauer static volatile const __be16 ENCAPSULATION_PORT; 4323458901SLorenz Bauer static volatile const __be32 ENCAPSULATION_IP; 4423458901SLorenz Bauer 4523458901SLorenz Bauer typedef struct { 4623458901SLorenz Bauer uint64_t processed_packets_total; 4723458901SLorenz Bauer uint64_t l3_protocol_packets_total_ipv4; 4823458901SLorenz Bauer uint64_t l3_protocol_packets_total_ipv6; 4923458901SLorenz Bauer uint64_t l4_protocol_packets_total_tcp; 5023458901SLorenz Bauer uint64_t l4_protocol_packets_total_udp; 5123458901SLorenz Bauer uint64_t accepted_packets_total_syn; 5223458901SLorenz Bauer uint64_t accepted_packets_total_syn_cookies; 5323458901SLorenz Bauer uint64_t accepted_packets_total_last_hop; 5423458901SLorenz Bauer uint64_t accepted_packets_total_icmp_echo_request; 5523458901SLorenz Bauer uint64_t accepted_packets_total_established; 5623458901SLorenz Bauer uint64_t forwarded_packets_total_gue; 5723458901SLorenz Bauer uint64_t forwarded_packets_total_gre; 5823458901SLorenz Bauer 5923458901SLorenz Bauer uint64_t errors_total_unknown_l3_proto; 6023458901SLorenz Bauer uint64_t errors_total_unknown_l4_proto; 6123458901SLorenz Bauer uint64_t errors_total_malformed_ip; 6223458901SLorenz Bauer uint64_t errors_total_fragmented_ip; 6323458901SLorenz Bauer uint64_t errors_total_malformed_icmp; 6423458901SLorenz Bauer uint64_t errors_total_unwanted_icmp; 6523458901SLorenz Bauer uint64_t errors_total_malformed_icmp_pkt_too_big; 6623458901SLorenz Bauer uint64_t errors_total_malformed_tcp; 6723458901SLorenz Bauer uint64_t errors_total_malformed_udp; 6823458901SLorenz Bauer uint64_t errors_total_icmp_echo_replies; 6923458901SLorenz Bauer uint64_t errors_total_malformed_encapsulation; 7023458901SLorenz Bauer uint64_t errors_total_encap_adjust_failed; 7123458901SLorenz Bauer uint64_t errors_total_encap_buffer_too_small; 7223458901SLorenz Bauer uint64_t errors_total_redirect_loop; 7323458901SLorenz Bauer } metrics_t; 7423458901SLorenz Bauer 7523458901SLorenz Bauer typedef enum { 7623458901SLorenz Bauer INVALID = 0, 7723458901SLorenz Bauer UNKNOWN, 7823458901SLorenz Bauer ECHO_REQUEST, 7923458901SLorenz Bauer SYN, 8023458901SLorenz Bauer SYN_COOKIE, 8123458901SLorenz Bauer ESTABLISHED, 8223458901SLorenz Bauer } verdict_t; 8323458901SLorenz Bauer 8423458901SLorenz Bauer typedef struct { 8523458901SLorenz Bauer uint16_t src, dst; 8623458901SLorenz Bauer } flow_ports_t; 8723458901SLorenz Bauer 8823458901SLorenz Bauer _Static_assert( 8923458901SLorenz Bauer sizeof(flow_ports_t) != 9023458901SLorenz Bauer offsetofend(struct bpf_sock_tuple, ipv4.dport) - 9123458901SLorenz Bauer offsetof(struct bpf_sock_tuple, ipv4.sport) - 1, 9223458901SLorenz Bauer "flow_ports_t must match sport and dport in struct bpf_sock_tuple"); 9323458901SLorenz Bauer _Static_assert( 9423458901SLorenz Bauer sizeof(flow_ports_t) != 9523458901SLorenz Bauer offsetofend(struct bpf_sock_tuple, ipv6.dport) - 9623458901SLorenz Bauer offsetof(struct bpf_sock_tuple, ipv6.sport) - 1, 9723458901SLorenz Bauer "flow_ports_t must match sport and dport in struct bpf_sock_tuple"); 9823458901SLorenz Bauer 9923458901SLorenz Bauer typedef int ret_t; 10023458901SLorenz Bauer 10123458901SLorenz Bauer /* This is a bit of a hack. We need a return value which allows us to 10223458901SLorenz Bauer * indicate that the regular flow of the program should continue, 10323458901SLorenz Bauer * while allowing functions to use XDP_PASS and XDP_DROP, etc. 10423458901SLorenz Bauer */ 10523458901SLorenz Bauer static const ret_t CONTINUE_PROCESSING = -1; 10623458901SLorenz Bauer 10723458901SLorenz Bauer /* Convenience macro to call functions which return ret_t. 10823458901SLorenz Bauer */ 10923458901SLorenz Bauer #define MAYBE_RETURN(x) \ 11023458901SLorenz Bauer do { \ 11123458901SLorenz Bauer ret_t __ret = x; \ 11223458901SLorenz Bauer if (__ret != CONTINUE_PROCESSING) \ 11323458901SLorenz Bauer return __ret; \ 11423458901SLorenz Bauer } while (0) 11523458901SLorenz Bauer 11623458901SLorenz Bauer /* Linux packet pointers are either aligned to NET_IP_ALIGN (aka 2 bytes), 11723458901SLorenz Bauer * or not aligned if the arch supports efficient unaligned access. 11823458901SLorenz Bauer * 11923458901SLorenz Bauer * Since the verifier ensures that eBPF packet accesses follow these rules, 12023458901SLorenz Bauer * we can tell LLVM to emit code as if we always had a larger alignment. 12123458901SLorenz Bauer * It will yell at us if we end up on a platform where this is not valid. 12223458901SLorenz Bauer */ 12323458901SLorenz Bauer typedef uint8_t *net_ptr __attribute__((align_value(8))); 12423458901SLorenz Bauer 12523458901SLorenz Bauer typedef struct buf { 12623458901SLorenz Bauer struct __sk_buff *skb; 12723458901SLorenz Bauer net_ptr head; 12823458901SLorenz Bauer /* NB: tail musn't have alignment other than 1, otherwise 12923458901SLorenz Bauer * LLVM will go and eliminate code, e.g. when checking packet lengths. 13023458901SLorenz Bauer */ 13123458901SLorenz Bauer uint8_t *const tail; 13223458901SLorenz Bauer } buf_t; 13323458901SLorenz Bauer 134ee333df5SAndrii Nakryiko static __always_inline size_t buf_off(const buf_t *buf) 13523458901SLorenz Bauer { 13623458901SLorenz Bauer /* Clang seems to optimize constructs like 13723458901SLorenz Bauer * a - b + c 13823458901SLorenz Bauer * if c is known: 13923458901SLorenz Bauer * r? = c 14023458901SLorenz Bauer * r? -= b 14123458901SLorenz Bauer * r? += a 14223458901SLorenz Bauer * 14323458901SLorenz Bauer * This is a problem if a and b are packet pointers, 14423458901SLorenz Bauer * since the verifier allows subtracting two pointers to 14523458901SLorenz Bauer * get a scalar, but not a scalar and a pointer. 14623458901SLorenz Bauer * 14723458901SLorenz Bauer * Use inline asm to break this optimization. 14823458901SLorenz Bauer */ 14923458901SLorenz Bauer size_t off = (size_t)buf->head; 15023458901SLorenz Bauer asm("%0 -= %1" : "+r"(off) : "r"(buf->skb->data)); 15123458901SLorenz Bauer return off; 15223458901SLorenz Bauer } 15323458901SLorenz Bauer 154ee333df5SAndrii Nakryiko static __always_inline bool buf_copy(buf_t *buf, void *dst, size_t len) 15523458901SLorenz Bauer { 15623458901SLorenz Bauer if (bpf_skb_load_bytes(buf->skb, buf_off(buf), dst, len)) { 15723458901SLorenz Bauer return false; 15823458901SLorenz Bauer } 15923458901SLorenz Bauer 16023458901SLorenz Bauer buf->head += len; 16123458901SLorenz Bauer return true; 16223458901SLorenz Bauer } 16323458901SLorenz Bauer 164ee333df5SAndrii Nakryiko static __always_inline bool buf_skip(buf_t *buf, const size_t len) 16523458901SLorenz Bauer { 16623458901SLorenz Bauer /* Check whether off + len is valid in the non-linear part. */ 16723458901SLorenz Bauer if (buf_off(buf) + len > buf->skb->len) { 16823458901SLorenz Bauer return false; 16923458901SLorenz Bauer } 17023458901SLorenz Bauer 17123458901SLorenz Bauer buf->head += len; 17223458901SLorenz Bauer return true; 17323458901SLorenz Bauer } 17423458901SLorenz Bauer 17523458901SLorenz Bauer /* Returns a pointer to the start of buf, or NULL if len is 17623458901SLorenz Bauer * larger than the remaining data. Consumes len bytes on a successful 17723458901SLorenz Bauer * call. 17823458901SLorenz Bauer * 17923458901SLorenz Bauer * If scratch is not NULL, the function will attempt to load non-linear 18023458901SLorenz Bauer * data via bpf_skb_load_bytes. On success, scratch is returned. 18123458901SLorenz Bauer */ 182ee333df5SAndrii Nakryiko static __always_inline void *buf_assign(buf_t *buf, const size_t len, void *scratch) 18323458901SLorenz Bauer { 18423458901SLorenz Bauer if (buf->head + len > buf->tail) { 18523458901SLorenz Bauer if (scratch == NULL) { 18623458901SLorenz Bauer return NULL; 18723458901SLorenz Bauer } 18823458901SLorenz Bauer 18923458901SLorenz Bauer return buf_copy(buf, scratch, len) ? scratch : NULL; 19023458901SLorenz Bauer } 19123458901SLorenz Bauer 19223458901SLorenz Bauer void *ptr = buf->head; 19323458901SLorenz Bauer buf->head += len; 19423458901SLorenz Bauer return ptr; 19523458901SLorenz Bauer } 19623458901SLorenz Bauer 197ee333df5SAndrii Nakryiko static INLINING bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4) 19823458901SLorenz Bauer { 19923458901SLorenz Bauer if (ipv4->ihl <= 5) { 20023458901SLorenz Bauer return true; 20123458901SLorenz Bauer } 20223458901SLorenz Bauer 20323458901SLorenz Bauer return buf_skip(buf, (ipv4->ihl - 5) * 4); 20423458901SLorenz Bauer } 20523458901SLorenz Bauer 206ee333df5SAndrii Nakryiko static INLINING bool ipv4_is_fragment(const struct iphdr *ip) 20723458901SLorenz Bauer { 20823458901SLorenz Bauer uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK); 20923458901SLorenz Bauer return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0; 21023458901SLorenz Bauer } 21123458901SLorenz Bauer 212ee333df5SAndrii Nakryiko static __always_inline struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch) 21323458901SLorenz Bauer { 21423458901SLorenz Bauer struct iphdr *ipv4 = buf_assign(pkt, sizeof(*ipv4), scratch); 21523458901SLorenz Bauer if (ipv4 == NULL) { 21623458901SLorenz Bauer return NULL; 21723458901SLorenz Bauer } 21823458901SLorenz Bauer 21923458901SLorenz Bauer if (ipv4->ihl < 5) { 22023458901SLorenz Bauer return NULL; 22123458901SLorenz Bauer } 22223458901SLorenz Bauer 22323458901SLorenz Bauer if (!pkt_skip_ipv4_options(pkt, ipv4)) { 22423458901SLorenz Bauer return NULL; 22523458901SLorenz Bauer } 22623458901SLorenz Bauer 22723458901SLorenz Bauer return ipv4; 22823458901SLorenz Bauer } 22923458901SLorenz Bauer 23023458901SLorenz Bauer /* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */ 231ee333df5SAndrii Nakryiko static INLINING bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports) 23223458901SLorenz Bauer { 23323458901SLorenz Bauer if (!buf_copy(pkt, ports, sizeof(*ports))) { 23423458901SLorenz Bauer return false; 23523458901SLorenz Bauer } 23623458901SLorenz Bauer 23723458901SLorenz Bauer /* Ports in the L4 headers are reversed, since we are parsing an ICMP 23823458901SLorenz Bauer * payload which is going towards the eyeball. 23923458901SLorenz Bauer */ 24023458901SLorenz Bauer uint16_t dst = ports->src; 24123458901SLorenz Bauer ports->src = ports->dst; 24223458901SLorenz Bauer ports->dst = dst; 24323458901SLorenz Bauer return true; 24423458901SLorenz Bauer } 24523458901SLorenz Bauer 246ee333df5SAndrii Nakryiko static INLINING uint16_t pkt_checksum_fold(uint32_t csum) 24723458901SLorenz Bauer { 24823458901SLorenz Bauer /* The highest reasonable value for an IPv4 header 24923458901SLorenz Bauer * checksum requires two folds, so we just do that always. 25023458901SLorenz Bauer */ 25123458901SLorenz Bauer csum = (csum & 0xffff) + (csum >> 16); 25223458901SLorenz Bauer csum = (csum & 0xffff) + (csum >> 16); 25323458901SLorenz Bauer return (uint16_t)~csum; 25423458901SLorenz Bauer } 25523458901SLorenz Bauer 256ee333df5SAndrii Nakryiko static INLINING void pkt_ipv4_checksum(struct iphdr *iph) 25723458901SLorenz Bauer { 25823458901SLorenz Bauer iph->check = 0; 25923458901SLorenz Bauer 26023458901SLorenz Bauer /* An IP header without options is 20 bytes. Two of those 26123458901SLorenz Bauer * are the checksum, which we always set to zero. Hence, 26223458901SLorenz Bauer * the maximum accumulated value is 18 / 2 * 0xffff = 0x8fff7, 26323458901SLorenz Bauer * which fits in 32 bit. 26423458901SLorenz Bauer */ 26523458901SLorenz Bauer _Static_assert(sizeof(struct iphdr) == 20, "iphdr must be 20 bytes"); 26623458901SLorenz Bauer uint32_t acc = 0; 26723458901SLorenz Bauer uint16_t *ipw = (uint16_t *)iph; 26823458901SLorenz Bauer 26923458901SLorenz Bauer #pragma clang loop unroll(full) 27023458901SLorenz Bauer for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++) { 27123458901SLorenz Bauer acc += ipw[i]; 27223458901SLorenz Bauer } 27323458901SLorenz Bauer 27423458901SLorenz Bauer iph->check = pkt_checksum_fold(acc); 27523458901SLorenz Bauer } 27623458901SLorenz Bauer 277ee333df5SAndrii Nakryiko static INLINING 278ee333df5SAndrii Nakryiko bool pkt_skip_ipv6_extension_headers(buf_t *pkt, 27923458901SLorenz Bauer const struct ipv6hdr *ipv6, 28023458901SLorenz Bauer uint8_t *upper_proto, 28123458901SLorenz Bauer bool *is_fragment) 28223458901SLorenz Bauer { 28323458901SLorenz Bauer /* We understand five extension headers. 28423458901SLorenz Bauer * https://tools.ietf.org/html/rfc8200#section-4.1 states that all 28523458901SLorenz Bauer * headers should occur once, except Destination Options, which may 28623458901SLorenz Bauer * occur twice. Hence we give up after 6 headers. 28723458901SLorenz Bauer */ 28823458901SLorenz Bauer struct { 28923458901SLorenz Bauer uint8_t next; 29023458901SLorenz Bauer uint8_t len; 29123458901SLorenz Bauer } exthdr = { 29223458901SLorenz Bauer .next = ipv6->nexthdr, 29323458901SLorenz Bauer }; 29423458901SLorenz Bauer *is_fragment = false; 29523458901SLorenz Bauer 29623458901SLorenz Bauer #pragma clang loop unroll(full) 29723458901SLorenz Bauer for (int i = 0; i < 6; i++) { 29823458901SLorenz Bauer switch (exthdr.next) { 29923458901SLorenz Bauer case IPPROTO_FRAGMENT: 30023458901SLorenz Bauer *is_fragment = true; 30123458901SLorenz Bauer /* NB: We don't check that hdrlen == 0 as per spec. */ 30223458901SLorenz Bauer /* fallthrough; */ 30323458901SLorenz Bauer 30423458901SLorenz Bauer case IPPROTO_HOPOPTS: 30523458901SLorenz Bauer case IPPROTO_ROUTING: 30623458901SLorenz Bauer case IPPROTO_DSTOPTS: 30723458901SLorenz Bauer case IPPROTO_MH: 30823458901SLorenz Bauer if (!buf_copy(pkt, &exthdr, sizeof(exthdr))) { 30923458901SLorenz Bauer return false; 31023458901SLorenz Bauer } 31123458901SLorenz Bauer 31223458901SLorenz Bauer /* hdrlen is in 8-octet units, and excludes the first 8 octets. */ 31323458901SLorenz Bauer if (!buf_skip(pkt, 31423458901SLorenz Bauer (exthdr.len + 1) * 8 - sizeof(exthdr))) { 31523458901SLorenz Bauer return false; 31623458901SLorenz Bauer } 31723458901SLorenz Bauer 31823458901SLorenz Bauer /* Decode next header */ 31923458901SLorenz Bauer break; 32023458901SLorenz Bauer 32123458901SLorenz Bauer default: 32223458901SLorenz Bauer /* The next header is not one of the known extension 32323458901SLorenz Bauer * headers, treat it as the upper layer header. 32423458901SLorenz Bauer * 32523458901SLorenz Bauer * This handles IPPROTO_NONE. 32623458901SLorenz Bauer * 32723458901SLorenz Bauer * Encapsulating Security Payload (50) and Authentication 32823458901SLorenz Bauer * Header (51) also end up here (and will trigger an 32923458901SLorenz Bauer * unknown proto error later). They have a custom header 33023458901SLorenz Bauer * format and seem too esoteric to care about. 33123458901SLorenz Bauer */ 33223458901SLorenz Bauer *upper_proto = exthdr.next; 33323458901SLorenz Bauer return true; 33423458901SLorenz Bauer } 33523458901SLorenz Bauer } 33623458901SLorenz Bauer 33723458901SLorenz Bauer /* We never found an upper layer header. */ 33823458901SLorenz Bauer return false; 33923458901SLorenz Bauer } 34023458901SLorenz Bauer 34123458901SLorenz Bauer /* This function has to be inlined, because the verifier otherwise rejects it 34223458901SLorenz Bauer * due to returning a pointer to the stack. This is technically correct, since 34323458901SLorenz Bauer * scratch is allocated on the stack. However, this usage should be safe since 34423458901SLorenz Bauer * it's the callers stack after all. 34523458901SLorenz Bauer */ 346ee333df5SAndrii Nakryiko static __always_inline struct ipv6hdr * 34723458901SLorenz Bauer pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto, 34823458901SLorenz Bauer bool *is_fragment) 34923458901SLorenz Bauer { 35023458901SLorenz Bauer struct ipv6hdr *ipv6 = buf_assign(pkt, sizeof(*ipv6), scratch); 35123458901SLorenz Bauer if (ipv6 == NULL) { 35223458901SLorenz Bauer return NULL; 35323458901SLorenz Bauer } 35423458901SLorenz Bauer 35523458901SLorenz Bauer if (!pkt_skip_ipv6_extension_headers(pkt, ipv6, proto, is_fragment)) { 35623458901SLorenz Bauer return NULL; 35723458901SLorenz Bauer } 35823458901SLorenz Bauer 35923458901SLorenz Bauer return ipv6; 36023458901SLorenz Bauer } 36123458901SLorenz Bauer 36223458901SLorenz Bauer /* Global metrics, per CPU 36323458901SLorenz Bauer */ 364ee333df5SAndrii Nakryiko struct { 365ee333df5SAndrii Nakryiko __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 366ee333df5SAndrii Nakryiko __uint(max_entries, 1); 367ee333df5SAndrii Nakryiko __type(key, unsigned int); 368ee333df5SAndrii Nakryiko __type(value, metrics_t); 369ee333df5SAndrii Nakryiko } metrics_map SEC(".maps"); 37023458901SLorenz Bauer 371ee333df5SAndrii Nakryiko static INLINING metrics_t *get_global_metrics(void) 37223458901SLorenz Bauer { 37323458901SLorenz Bauer uint64_t key = 0; 37423458901SLorenz Bauer return bpf_map_lookup_elem(&metrics_map, &key); 37523458901SLorenz Bauer } 37623458901SLorenz Bauer 377ee333df5SAndrii Nakryiko static INLINING ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap) 37823458901SLorenz Bauer { 37923458901SLorenz Bauer const int payload_off = 38023458901SLorenz Bauer sizeof(*encap) + 38123458901SLorenz Bauer sizeof(struct in_addr) * encap->unigue.hop_count; 38223458901SLorenz Bauer int32_t encap_overhead = payload_off - sizeof(struct ethhdr); 38323458901SLorenz Bauer 38423458901SLorenz Bauer // Changing the ethertype if the encapsulated packet is ipv6 38523458901SLorenz Bauer if (encap->gue.proto_ctype == IPPROTO_IPV6) { 38623458901SLorenz Bauer encap->eth.h_proto = bpf_htons(ETH_P_IPV6); 38723458901SLorenz Bauer } 38823458901SLorenz Bauer 38923458901SLorenz Bauer if (bpf_skb_adjust_room(skb, -encap_overhead, BPF_ADJ_ROOM_MAC, 390c4ba153bSDaniel Borkmann BPF_F_ADJ_ROOM_FIXED_GSO | 391c4ba153bSDaniel Borkmann BPF_F_ADJ_ROOM_NO_CSUM_RESET) || 392c4ba153bSDaniel Borkmann bpf_csum_level(skb, BPF_CSUM_LEVEL_DEC)) 39323458901SLorenz Bauer return TC_ACT_SHOT; 39423458901SLorenz Bauer 39523458901SLorenz Bauer return bpf_redirect(skb->ifindex, BPF_F_INGRESS); 39623458901SLorenz Bauer } 39723458901SLorenz Bauer 398ee333df5SAndrii Nakryiko static INLINING ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap, 39923458901SLorenz Bauer struct in_addr *next_hop, metrics_t *metrics) 40023458901SLorenz Bauer { 40123458901SLorenz Bauer metrics->forwarded_packets_total_gre++; 40223458901SLorenz Bauer 40323458901SLorenz Bauer const int payload_off = 40423458901SLorenz Bauer sizeof(*encap) + 40523458901SLorenz Bauer sizeof(struct in_addr) * encap->unigue.hop_count; 40623458901SLorenz Bauer int32_t encap_overhead = 40723458901SLorenz Bauer payload_off - sizeof(struct ethhdr) - sizeof(struct iphdr); 40823458901SLorenz Bauer int32_t delta = sizeof(struct gre_base_hdr) - encap_overhead; 40923458901SLorenz Bauer uint16_t proto = ETH_P_IP; 41023458901SLorenz Bauer 41123458901SLorenz Bauer /* Loop protection: the inner packet's TTL is decremented as a safeguard 41223458901SLorenz Bauer * against any forwarding loop. As the only interesting field is the TTL 41323458901SLorenz Bauer * hop limit for IPv6, it is easier to use bpf_skb_load_bytes/bpf_skb_store_bytes 41423458901SLorenz Bauer * as they handle the split packets if needed (no need for the data to be 41523458901SLorenz Bauer * in the linear section). 41623458901SLorenz Bauer */ 41723458901SLorenz Bauer if (encap->gue.proto_ctype == IPPROTO_IPV6) { 41823458901SLorenz Bauer proto = ETH_P_IPV6; 41923458901SLorenz Bauer uint8_t ttl; 42023458901SLorenz Bauer int rc; 42123458901SLorenz Bauer 42223458901SLorenz Bauer rc = bpf_skb_load_bytes( 42323458901SLorenz Bauer skb, payload_off + offsetof(struct ipv6hdr, hop_limit), 42423458901SLorenz Bauer &ttl, 1); 42523458901SLorenz Bauer if (rc != 0) { 42623458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 42723458901SLorenz Bauer return TC_ACT_SHOT; 42823458901SLorenz Bauer } 42923458901SLorenz Bauer 43023458901SLorenz Bauer if (ttl == 0) { 43123458901SLorenz Bauer metrics->errors_total_redirect_loop++; 43223458901SLorenz Bauer return TC_ACT_SHOT; 43323458901SLorenz Bauer } 43423458901SLorenz Bauer 43523458901SLorenz Bauer ttl--; 43623458901SLorenz Bauer rc = bpf_skb_store_bytes( 43723458901SLorenz Bauer skb, payload_off + offsetof(struct ipv6hdr, hop_limit), 43823458901SLorenz Bauer &ttl, 1, 0); 43923458901SLorenz Bauer if (rc != 0) { 44023458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 44123458901SLorenz Bauer return TC_ACT_SHOT; 44223458901SLorenz Bauer } 44323458901SLorenz Bauer } else { 44423458901SLorenz Bauer uint8_t ttl; 44523458901SLorenz Bauer int rc; 44623458901SLorenz Bauer 44723458901SLorenz Bauer rc = bpf_skb_load_bytes( 44823458901SLorenz Bauer skb, payload_off + offsetof(struct iphdr, ttl), &ttl, 44923458901SLorenz Bauer 1); 45023458901SLorenz Bauer if (rc != 0) { 45123458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 45223458901SLorenz Bauer return TC_ACT_SHOT; 45323458901SLorenz Bauer } 45423458901SLorenz Bauer 45523458901SLorenz Bauer if (ttl == 0) { 45623458901SLorenz Bauer metrics->errors_total_redirect_loop++; 45723458901SLorenz Bauer return TC_ACT_SHOT; 45823458901SLorenz Bauer } 45923458901SLorenz Bauer 46023458901SLorenz Bauer /* IPv4 also has a checksum to patch. While the TTL is only one byte, 46123458901SLorenz Bauer * this function only works for 2 and 4 bytes arguments (the result is 46223458901SLorenz Bauer * the same). 46323458901SLorenz Bauer */ 46423458901SLorenz Bauer rc = bpf_l3_csum_replace( 46523458901SLorenz Bauer skb, payload_off + offsetof(struct iphdr, check), ttl, 46623458901SLorenz Bauer ttl - 1, 2); 46723458901SLorenz Bauer if (rc != 0) { 46823458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 46923458901SLorenz Bauer return TC_ACT_SHOT; 47023458901SLorenz Bauer } 47123458901SLorenz Bauer 47223458901SLorenz Bauer ttl--; 47323458901SLorenz Bauer rc = bpf_skb_store_bytes( 47423458901SLorenz Bauer skb, payload_off + offsetof(struct iphdr, ttl), &ttl, 1, 47523458901SLorenz Bauer 0); 47623458901SLorenz Bauer if (rc != 0) { 47723458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 47823458901SLorenz Bauer return TC_ACT_SHOT; 47923458901SLorenz Bauer } 48023458901SLorenz Bauer } 48123458901SLorenz Bauer 48223458901SLorenz Bauer if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET, 483c4ba153bSDaniel Borkmann BPF_F_ADJ_ROOM_FIXED_GSO | 484c4ba153bSDaniel Borkmann BPF_F_ADJ_ROOM_NO_CSUM_RESET) || 485c4ba153bSDaniel Borkmann bpf_csum_level(skb, BPF_CSUM_LEVEL_INC)) { 48623458901SLorenz Bauer metrics->errors_total_encap_adjust_failed++; 48723458901SLorenz Bauer return TC_ACT_SHOT; 48823458901SLorenz Bauer } 48923458901SLorenz Bauer 49023458901SLorenz Bauer if (bpf_skb_pull_data(skb, sizeof(encap_gre_t))) { 49123458901SLorenz Bauer metrics->errors_total_encap_buffer_too_small++; 49223458901SLorenz Bauer return TC_ACT_SHOT; 49323458901SLorenz Bauer } 49423458901SLorenz Bauer 49523458901SLorenz Bauer buf_t pkt = { 49623458901SLorenz Bauer .skb = skb, 49723458901SLorenz Bauer .head = (uint8_t *)(long)skb->data, 49823458901SLorenz Bauer .tail = (uint8_t *)(long)skb->data_end, 49923458901SLorenz Bauer }; 50023458901SLorenz Bauer 50123458901SLorenz Bauer encap_gre_t *encap_gre = buf_assign(&pkt, sizeof(encap_gre_t), NULL); 50223458901SLorenz Bauer if (encap_gre == NULL) { 50323458901SLorenz Bauer metrics->errors_total_encap_buffer_too_small++; 50423458901SLorenz Bauer return TC_ACT_SHOT; 50523458901SLorenz Bauer } 50623458901SLorenz Bauer 50723458901SLorenz Bauer encap_gre->ip.protocol = IPPROTO_GRE; 50823458901SLorenz Bauer encap_gre->ip.daddr = next_hop->s_addr; 50923458901SLorenz Bauer encap_gre->ip.saddr = ENCAPSULATION_IP; 51023458901SLorenz Bauer encap_gre->ip.tot_len = 51123458901SLorenz Bauer bpf_htons(bpf_ntohs(encap_gre->ip.tot_len) + delta); 51223458901SLorenz Bauer encap_gre->gre.flags = 0; 51323458901SLorenz Bauer encap_gre->gre.protocol = bpf_htons(proto); 51423458901SLorenz Bauer pkt_ipv4_checksum((void *)&encap_gre->ip); 51523458901SLorenz Bauer 51623458901SLorenz Bauer return bpf_redirect(skb->ifindex, 0); 51723458901SLorenz Bauer } 51823458901SLorenz Bauer 519ee333df5SAndrii Nakryiko static INLINING ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap, 52023458901SLorenz Bauer struct in_addr *next_hop, metrics_t *metrics) 52123458901SLorenz Bauer { 52223458901SLorenz Bauer /* swap L2 addresses */ 52323458901SLorenz Bauer /* This assumes that packets are received from a router. 52423458901SLorenz Bauer * So just swapping the MAC addresses here will make the packet go back to 52523458901SLorenz Bauer * the router, which will send it to the appropriate machine. 52623458901SLorenz Bauer */ 52723458901SLorenz Bauer unsigned char temp[ETH_ALEN]; 52823458901SLorenz Bauer memcpy(temp, encap->eth.h_dest, sizeof(temp)); 52923458901SLorenz Bauer memcpy(encap->eth.h_dest, encap->eth.h_source, 53023458901SLorenz Bauer sizeof(encap->eth.h_dest)); 53123458901SLorenz Bauer memcpy(encap->eth.h_source, temp, sizeof(encap->eth.h_source)); 53223458901SLorenz Bauer 53323458901SLorenz Bauer if (encap->unigue.next_hop == encap->unigue.hop_count - 1 && 53423458901SLorenz Bauer encap->unigue.last_hop_gre) { 53523458901SLorenz Bauer return forward_with_gre(skb, encap, next_hop, metrics); 53623458901SLorenz Bauer } 53723458901SLorenz Bauer 53823458901SLorenz Bauer metrics->forwarded_packets_total_gue++; 53923458901SLorenz Bauer uint32_t old_saddr = encap->ip.saddr; 54023458901SLorenz Bauer encap->ip.saddr = encap->ip.daddr; 54123458901SLorenz Bauer encap->ip.daddr = next_hop->s_addr; 54223458901SLorenz Bauer if (encap->unigue.next_hop < encap->unigue.hop_count) { 54323458901SLorenz Bauer encap->unigue.next_hop++; 54423458901SLorenz Bauer } 54523458901SLorenz Bauer 54623458901SLorenz Bauer /* Remove ip->saddr, add next_hop->s_addr */ 54723458901SLorenz Bauer const uint64_t off = offsetof(typeof(*encap), ip.check); 54823458901SLorenz Bauer int ret = bpf_l3_csum_replace(skb, off, old_saddr, next_hop->s_addr, 4); 54923458901SLorenz Bauer if (ret < 0) { 55023458901SLorenz Bauer return TC_ACT_SHOT; 55123458901SLorenz Bauer } 55223458901SLorenz Bauer 55323458901SLorenz Bauer return bpf_redirect(skb->ifindex, 0); 55423458901SLorenz Bauer } 55523458901SLorenz Bauer 556ee333df5SAndrii Nakryiko static INLINING ret_t skip_next_hops(buf_t *pkt, int n) 55723458901SLorenz Bauer { 55823458901SLorenz Bauer switch (n) { 55923458901SLorenz Bauer case 1: 56023458901SLorenz Bauer if (!buf_skip(pkt, sizeof(struct in_addr))) 56123458901SLorenz Bauer return TC_ACT_SHOT; 56223458901SLorenz Bauer case 0: 56323458901SLorenz Bauer return CONTINUE_PROCESSING; 56423458901SLorenz Bauer 56523458901SLorenz Bauer default: 56623458901SLorenz Bauer return TC_ACT_SHOT; 56723458901SLorenz Bauer } 56823458901SLorenz Bauer } 56923458901SLorenz Bauer 57023458901SLorenz Bauer /* Get the next hop from the GLB header. 57123458901SLorenz Bauer * 57223458901SLorenz Bauer * Sets next_hop->s_addr to 0 if there are no more hops left. 57323458901SLorenz Bauer * pkt is positioned just after the variable length GLB header 57423458901SLorenz Bauer * iff the call is successful. 57523458901SLorenz Bauer */ 576ee333df5SAndrii Nakryiko static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap, 57723458901SLorenz Bauer struct in_addr *next_hop) 57823458901SLorenz Bauer { 57923458901SLorenz Bauer if (encap->unigue.next_hop > encap->unigue.hop_count) { 58023458901SLorenz Bauer return TC_ACT_SHOT; 58123458901SLorenz Bauer } 58223458901SLorenz Bauer 58323458901SLorenz Bauer /* Skip "used" next hops. */ 58423458901SLorenz Bauer MAYBE_RETURN(skip_next_hops(pkt, encap->unigue.next_hop)); 58523458901SLorenz Bauer 58623458901SLorenz Bauer if (encap->unigue.next_hop == encap->unigue.hop_count) { 58723458901SLorenz Bauer /* No more next hops, we are at the end of the GLB header. */ 58823458901SLorenz Bauer next_hop->s_addr = 0; 58923458901SLorenz Bauer return CONTINUE_PROCESSING; 59023458901SLorenz Bauer } 59123458901SLorenz Bauer 59223458901SLorenz Bauer if (!buf_copy(pkt, next_hop, sizeof(*next_hop))) { 59323458901SLorenz Bauer return TC_ACT_SHOT; 59423458901SLorenz Bauer } 59523458901SLorenz Bauer 59623458901SLorenz Bauer /* Skip the remainig next hops (may be zero). */ 59723458901SLorenz Bauer return skip_next_hops(pkt, encap->unigue.hop_count - 59823458901SLorenz Bauer encap->unigue.next_hop - 1); 59923458901SLorenz Bauer } 60023458901SLorenz Bauer 60123458901SLorenz Bauer /* Fill a bpf_sock_tuple to be used with the socket lookup functions. 60223458901SLorenz Bauer * This is a kludge that let's us work around verifier limitations: 60323458901SLorenz Bauer * 60423458901SLorenz Bauer * fill_tuple(&t, foo, sizeof(struct iphdr), 123, 321) 60523458901SLorenz Bauer * 60623458901SLorenz Bauer * clang will substitue a costant for sizeof, which allows the verifier 60723458901SLorenz Bauer * to track it's value. Based on this, it can figure out the constant 60823458901SLorenz Bauer * return value, and calling code works while still being "generic" to 60923458901SLorenz Bauer * IPv4 and IPv6. 61023458901SLorenz Bauer */ 611ee333df5SAndrii Nakryiko static INLINING uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph, 61223458901SLorenz Bauer uint64_t iphlen, uint16_t sport, uint16_t dport) 61323458901SLorenz Bauer { 61423458901SLorenz Bauer switch (iphlen) { 61523458901SLorenz Bauer case sizeof(struct iphdr): { 61623458901SLorenz Bauer struct iphdr *ipv4 = (struct iphdr *)iph; 61723458901SLorenz Bauer tuple->ipv4.daddr = ipv4->daddr; 61823458901SLorenz Bauer tuple->ipv4.saddr = ipv4->saddr; 61923458901SLorenz Bauer tuple->ipv4.sport = sport; 62023458901SLorenz Bauer tuple->ipv4.dport = dport; 62123458901SLorenz Bauer return sizeof(tuple->ipv4); 62223458901SLorenz Bauer } 62323458901SLorenz Bauer 62423458901SLorenz Bauer case sizeof(struct ipv6hdr): { 62523458901SLorenz Bauer struct ipv6hdr *ipv6 = (struct ipv6hdr *)iph; 62623458901SLorenz Bauer memcpy(&tuple->ipv6.daddr, &ipv6->daddr, 62723458901SLorenz Bauer sizeof(tuple->ipv6.daddr)); 62823458901SLorenz Bauer memcpy(&tuple->ipv6.saddr, &ipv6->saddr, 62923458901SLorenz Bauer sizeof(tuple->ipv6.saddr)); 63023458901SLorenz Bauer tuple->ipv6.sport = sport; 63123458901SLorenz Bauer tuple->ipv6.dport = dport; 63223458901SLorenz Bauer return sizeof(tuple->ipv6); 63323458901SLorenz Bauer } 63423458901SLorenz Bauer 63523458901SLorenz Bauer default: 63623458901SLorenz Bauer return 0; 63723458901SLorenz Bauer } 63823458901SLorenz Bauer } 63923458901SLorenz Bauer 640ee333df5SAndrii Nakryiko static INLINING verdict_t classify_tcp(struct __sk_buff *skb, 64123458901SLorenz Bauer struct bpf_sock_tuple *tuple, uint64_t tuplen, 64223458901SLorenz Bauer void *iph, struct tcphdr *tcp) 64323458901SLorenz Bauer { 64423458901SLorenz Bauer struct bpf_sock *sk = 64523458901SLorenz Bauer bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); 64623458901SLorenz Bauer if (sk == NULL) { 64723458901SLorenz Bauer return UNKNOWN; 64823458901SLorenz Bauer } 64923458901SLorenz Bauer 65023458901SLorenz Bauer if (sk->state != BPF_TCP_LISTEN) { 65123458901SLorenz Bauer bpf_sk_release(sk); 65223458901SLorenz Bauer return ESTABLISHED; 65323458901SLorenz Bauer } 65423458901SLorenz Bauer 65523458901SLorenz Bauer if (iph != NULL && tcp != NULL) { 65623458901SLorenz Bauer /* Kludge: we've run out of arguments, but need the length of the ip header. */ 65723458901SLorenz Bauer uint64_t iphlen = sizeof(struct iphdr); 65823458901SLorenz Bauer if (tuplen == sizeof(tuple->ipv6)) { 65923458901SLorenz Bauer iphlen = sizeof(struct ipv6hdr); 66023458901SLorenz Bauer } 66123458901SLorenz Bauer 66223458901SLorenz Bauer if (bpf_tcp_check_syncookie(sk, iph, iphlen, tcp, 66323458901SLorenz Bauer sizeof(*tcp)) == 0) { 66423458901SLorenz Bauer bpf_sk_release(sk); 66523458901SLorenz Bauer return SYN_COOKIE; 66623458901SLorenz Bauer } 66723458901SLorenz Bauer } 66823458901SLorenz Bauer 66923458901SLorenz Bauer bpf_sk_release(sk); 67023458901SLorenz Bauer return UNKNOWN; 67123458901SLorenz Bauer } 67223458901SLorenz Bauer 673ee333df5SAndrii Nakryiko static INLINING verdict_t classify_udp(struct __sk_buff *skb, 67423458901SLorenz Bauer struct bpf_sock_tuple *tuple, uint64_t tuplen) 67523458901SLorenz Bauer { 67623458901SLorenz Bauer struct bpf_sock *sk = 67723458901SLorenz Bauer bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); 67823458901SLorenz Bauer if (sk == NULL) { 67923458901SLorenz Bauer return UNKNOWN; 68023458901SLorenz Bauer } 68123458901SLorenz Bauer 68223458901SLorenz Bauer if (sk->state == BPF_TCP_ESTABLISHED) { 68323458901SLorenz Bauer bpf_sk_release(sk); 68423458901SLorenz Bauer return ESTABLISHED; 68523458901SLorenz Bauer } 68623458901SLorenz Bauer 68723458901SLorenz Bauer bpf_sk_release(sk); 68823458901SLorenz Bauer return UNKNOWN; 68923458901SLorenz Bauer } 69023458901SLorenz Bauer 691ee333df5SAndrii Nakryiko static INLINING verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, 69223458901SLorenz Bauer struct bpf_sock_tuple *tuple, uint64_t tuplen, 69323458901SLorenz Bauer metrics_t *metrics) 69423458901SLorenz Bauer { 69523458901SLorenz Bauer switch (proto) { 69623458901SLorenz Bauer case IPPROTO_TCP: 69723458901SLorenz Bauer return classify_tcp(skb, tuple, tuplen, NULL, NULL); 69823458901SLorenz Bauer 69923458901SLorenz Bauer case IPPROTO_UDP: 70023458901SLorenz Bauer return classify_udp(skb, tuple, tuplen); 70123458901SLorenz Bauer 70223458901SLorenz Bauer default: 70323458901SLorenz Bauer metrics->errors_total_malformed_icmp++; 70423458901SLorenz Bauer return INVALID; 70523458901SLorenz Bauer } 70623458901SLorenz Bauer } 70723458901SLorenz Bauer 708ee333df5SAndrii Nakryiko static INLINING verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics) 70923458901SLorenz Bauer { 71023458901SLorenz Bauer struct icmphdr icmp; 71123458901SLorenz Bauer if (!buf_copy(pkt, &icmp, sizeof(icmp))) { 71223458901SLorenz Bauer metrics->errors_total_malformed_icmp++; 71323458901SLorenz Bauer return INVALID; 71423458901SLorenz Bauer } 71523458901SLorenz Bauer 71623458901SLorenz Bauer /* We should never receive encapsulated echo replies. */ 71723458901SLorenz Bauer if (icmp.type == ICMP_ECHOREPLY) { 71823458901SLorenz Bauer metrics->errors_total_icmp_echo_replies++; 71923458901SLorenz Bauer return INVALID; 72023458901SLorenz Bauer } 72123458901SLorenz Bauer 72223458901SLorenz Bauer if (icmp.type == ICMP_ECHO) { 72323458901SLorenz Bauer return ECHO_REQUEST; 72423458901SLorenz Bauer } 72523458901SLorenz Bauer 72623458901SLorenz Bauer if (icmp.type != ICMP_DEST_UNREACH || icmp.code != ICMP_FRAG_NEEDED) { 72723458901SLorenz Bauer metrics->errors_total_unwanted_icmp++; 72823458901SLorenz Bauer return INVALID; 72923458901SLorenz Bauer } 73023458901SLorenz Bauer 73123458901SLorenz Bauer struct iphdr _ip4; 73223458901SLorenz Bauer const struct iphdr *ipv4 = pkt_parse_ipv4(pkt, &_ip4); 73323458901SLorenz Bauer if (ipv4 == NULL) { 73423458901SLorenz Bauer metrics->errors_total_malformed_icmp_pkt_too_big++; 73523458901SLorenz Bauer return INVALID; 73623458901SLorenz Bauer } 73723458901SLorenz Bauer 73823458901SLorenz Bauer /* The source address in the outer IP header is from the entity that 73923458901SLorenz Bauer * originated the ICMP message. Use the original IP header to restore 74023458901SLorenz Bauer * the correct flow tuple. 74123458901SLorenz Bauer */ 74223458901SLorenz Bauer struct bpf_sock_tuple tuple; 74323458901SLorenz Bauer tuple.ipv4.saddr = ipv4->daddr; 74423458901SLorenz Bauer tuple.ipv4.daddr = ipv4->saddr; 74523458901SLorenz Bauer 74623458901SLorenz Bauer if (!pkt_parse_icmp_l4_ports(pkt, (flow_ports_t *)&tuple.ipv4.sport)) { 74723458901SLorenz Bauer metrics->errors_total_malformed_icmp_pkt_too_big++; 74823458901SLorenz Bauer return INVALID; 74923458901SLorenz Bauer } 75023458901SLorenz Bauer 75123458901SLorenz Bauer return classify_icmp(pkt->skb, ipv4->protocol, &tuple, 75223458901SLorenz Bauer sizeof(tuple.ipv4), metrics); 75323458901SLorenz Bauer } 75423458901SLorenz Bauer 755ee333df5SAndrii Nakryiko static INLINING verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics) 75623458901SLorenz Bauer { 75723458901SLorenz Bauer struct icmp6hdr icmp6; 75823458901SLorenz Bauer if (!buf_copy(pkt, &icmp6, sizeof(icmp6))) { 75923458901SLorenz Bauer metrics->errors_total_malformed_icmp++; 76023458901SLorenz Bauer return INVALID; 76123458901SLorenz Bauer } 76223458901SLorenz Bauer 76323458901SLorenz Bauer /* We should never receive encapsulated echo replies. */ 76423458901SLorenz Bauer if (icmp6.icmp6_type == ICMPV6_ECHO_REPLY) { 76523458901SLorenz Bauer metrics->errors_total_icmp_echo_replies++; 76623458901SLorenz Bauer return INVALID; 76723458901SLorenz Bauer } 76823458901SLorenz Bauer 76923458901SLorenz Bauer if (icmp6.icmp6_type == ICMPV6_ECHO_REQUEST) { 77023458901SLorenz Bauer return ECHO_REQUEST; 77123458901SLorenz Bauer } 77223458901SLorenz Bauer 77323458901SLorenz Bauer if (icmp6.icmp6_type != ICMPV6_PKT_TOOBIG) { 77423458901SLorenz Bauer metrics->errors_total_unwanted_icmp++; 77523458901SLorenz Bauer return INVALID; 77623458901SLorenz Bauer } 77723458901SLorenz Bauer 77823458901SLorenz Bauer bool is_fragment; 77923458901SLorenz Bauer uint8_t l4_proto; 78023458901SLorenz Bauer struct ipv6hdr _ipv6; 78123458901SLorenz Bauer const struct ipv6hdr *ipv6 = 78223458901SLorenz Bauer pkt_parse_ipv6(pkt, &_ipv6, &l4_proto, &is_fragment); 78323458901SLorenz Bauer if (ipv6 == NULL) { 78423458901SLorenz Bauer metrics->errors_total_malformed_icmp_pkt_too_big++; 78523458901SLorenz Bauer return INVALID; 78623458901SLorenz Bauer } 78723458901SLorenz Bauer 78823458901SLorenz Bauer if (is_fragment) { 78923458901SLorenz Bauer metrics->errors_total_fragmented_ip++; 79023458901SLorenz Bauer return INVALID; 79123458901SLorenz Bauer } 79223458901SLorenz Bauer 79323458901SLorenz Bauer /* Swap source and dest addresses. */ 79423458901SLorenz Bauer struct bpf_sock_tuple tuple; 79523458901SLorenz Bauer memcpy(&tuple.ipv6.saddr, &ipv6->daddr, sizeof(tuple.ipv6.saddr)); 79623458901SLorenz Bauer memcpy(&tuple.ipv6.daddr, &ipv6->saddr, sizeof(tuple.ipv6.daddr)); 79723458901SLorenz Bauer 79823458901SLorenz Bauer if (!pkt_parse_icmp_l4_ports(pkt, (flow_ports_t *)&tuple.ipv6.sport)) { 79923458901SLorenz Bauer metrics->errors_total_malformed_icmp_pkt_too_big++; 80023458901SLorenz Bauer return INVALID; 80123458901SLorenz Bauer } 80223458901SLorenz Bauer 80323458901SLorenz Bauer return classify_icmp(pkt->skb, l4_proto, &tuple, sizeof(tuple.ipv6), 80423458901SLorenz Bauer metrics); 80523458901SLorenz Bauer } 80623458901SLorenz Bauer 807ee333df5SAndrii Nakryiko static INLINING verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen, 80823458901SLorenz Bauer metrics_t *metrics) 80923458901SLorenz Bauer { 81023458901SLorenz Bauer metrics->l4_protocol_packets_total_tcp++; 81123458901SLorenz Bauer 81223458901SLorenz Bauer struct tcphdr _tcp; 81323458901SLorenz Bauer struct tcphdr *tcp = buf_assign(pkt, sizeof(_tcp), &_tcp); 81423458901SLorenz Bauer if (tcp == NULL) { 81523458901SLorenz Bauer metrics->errors_total_malformed_tcp++; 81623458901SLorenz Bauer return INVALID; 81723458901SLorenz Bauer } 81823458901SLorenz Bauer 81923458901SLorenz Bauer if (tcp->syn) { 82023458901SLorenz Bauer return SYN; 82123458901SLorenz Bauer } 82223458901SLorenz Bauer 82323458901SLorenz Bauer struct bpf_sock_tuple tuple; 82423458901SLorenz Bauer uint64_t tuplen = 82523458901SLorenz Bauer fill_tuple(&tuple, iph, iphlen, tcp->source, tcp->dest); 82623458901SLorenz Bauer return classify_tcp(pkt->skb, &tuple, tuplen, iph, tcp); 82723458901SLorenz Bauer } 82823458901SLorenz Bauer 829ee333df5SAndrii Nakryiko static INLINING verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen, 83023458901SLorenz Bauer metrics_t *metrics) 83123458901SLorenz Bauer { 83223458901SLorenz Bauer metrics->l4_protocol_packets_total_udp++; 83323458901SLorenz Bauer 83423458901SLorenz Bauer struct udphdr _udp; 83523458901SLorenz Bauer struct udphdr *udph = buf_assign(pkt, sizeof(_udp), &_udp); 83623458901SLorenz Bauer if (udph == NULL) { 83723458901SLorenz Bauer metrics->errors_total_malformed_udp++; 83823458901SLorenz Bauer return INVALID; 83923458901SLorenz Bauer } 84023458901SLorenz Bauer 84123458901SLorenz Bauer struct bpf_sock_tuple tuple; 84223458901SLorenz Bauer uint64_t tuplen = 84323458901SLorenz Bauer fill_tuple(&tuple, iph, iphlen, udph->source, udph->dest); 84423458901SLorenz Bauer return classify_udp(pkt->skb, &tuple, tuplen); 84523458901SLorenz Bauer } 84623458901SLorenz Bauer 847ee333df5SAndrii Nakryiko static INLINING verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics) 84823458901SLorenz Bauer { 84923458901SLorenz Bauer metrics->l3_protocol_packets_total_ipv4++; 85023458901SLorenz Bauer 85123458901SLorenz Bauer struct iphdr _ip4; 85223458901SLorenz Bauer struct iphdr *ipv4 = pkt_parse_ipv4(pkt, &_ip4); 85323458901SLorenz Bauer if (ipv4 == NULL) { 85423458901SLorenz Bauer metrics->errors_total_malformed_ip++; 85523458901SLorenz Bauer return INVALID; 85623458901SLorenz Bauer } 85723458901SLorenz Bauer 85823458901SLorenz Bauer if (ipv4->version != 4) { 85923458901SLorenz Bauer metrics->errors_total_malformed_ip++; 86023458901SLorenz Bauer return INVALID; 86123458901SLorenz Bauer } 86223458901SLorenz Bauer 86323458901SLorenz Bauer if (ipv4_is_fragment(ipv4)) { 86423458901SLorenz Bauer metrics->errors_total_fragmented_ip++; 86523458901SLorenz Bauer return INVALID; 86623458901SLorenz Bauer } 86723458901SLorenz Bauer 86823458901SLorenz Bauer switch (ipv4->protocol) { 86923458901SLorenz Bauer case IPPROTO_ICMP: 87023458901SLorenz Bauer return process_icmpv4(pkt, metrics); 87123458901SLorenz Bauer 87223458901SLorenz Bauer case IPPROTO_TCP: 87323458901SLorenz Bauer return process_tcp(pkt, ipv4, sizeof(*ipv4), metrics); 87423458901SLorenz Bauer 87523458901SLorenz Bauer case IPPROTO_UDP: 87623458901SLorenz Bauer return process_udp(pkt, ipv4, sizeof(*ipv4), metrics); 87723458901SLorenz Bauer 87823458901SLorenz Bauer default: 87923458901SLorenz Bauer metrics->errors_total_unknown_l4_proto++; 88023458901SLorenz Bauer return INVALID; 88123458901SLorenz Bauer } 88223458901SLorenz Bauer } 88323458901SLorenz Bauer 884ee333df5SAndrii Nakryiko static INLINING verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics) 88523458901SLorenz Bauer { 88623458901SLorenz Bauer metrics->l3_protocol_packets_total_ipv6++; 88723458901SLorenz Bauer 88823458901SLorenz Bauer uint8_t l4_proto; 88923458901SLorenz Bauer bool is_fragment; 89023458901SLorenz Bauer struct ipv6hdr _ipv6; 89123458901SLorenz Bauer struct ipv6hdr *ipv6 = 89223458901SLorenz Bauer pkt_parse_ipv6(pkt, &_ipv6, &l4_proto, &is_fragment); 89323458901SLorenz Bauer if (ipv6 == NULL) { 89423458901SLorenz Bauer metrics->errors_total_malformed_ip++; 89523458901SLorenz Bauer return INVALID; 89623458901SLorenz Bauer } 89723458901SLorenz Bauer 89823458901SLorenz Bauer if (ipv6->version != 6) { 89923458901SLorenz Bauer metrics->errors_total_malformed_ip++; 90023458901SLorenz Bauer return INVALID; 90123458901SLorenz Bauer } 90223458901SLorenz Bauer 90323458901SLorenz Bauer if (is_fragment) { 90423458901SLorenz Bauer metrics->errors_total_fragmented_ip++; 90523458901SLorenz Bauer return INVALID; 90623458901SLorenz Bauer } 90723458901SLorenz Bauer 90823458901SLorenz Bauer switch (l4_proto) { 90923458901SLorenz Bauer case IPPROTO_ICMPV6: 91023458901SLorenz Bauer return process_icmpv6(pkt, metrics); 91123458901SLorenz Bauer 91223458901SLorenz Bauer case IPPROTO_TCP: 91323458901SLorenz Bauer return process_tcp(pkt, ipv6, sizeof(*ipv6), metrics); 91423458901SLorenz Bauer 91523458901SLorenz Bauer case IPPROTO_UDP: 91623458901SLorenz Bauer return process_udp(pkt, ipv6, sizeof(*ipv6), metrics); 91723458901SLorenz Bauer 91823458901SLorenz Bauer default: 91923458901SLorenz Bauer metrics->errors_total_unknown_l4_proto++; 92023458901SLorenz Bauer return INVALID; 92123458901SLorenz Bauer } 92223458901SLorenz Bauer } 92323458901SLorenz Bauer 92423458901SLorenz Bauer SEC("classifier/cls_redirect") 92523458901SLorenz Bauer int cls_redirect(struct __sk_buff *skb) 92623458901SLorenz Bauer { 92723458901SLorenz Bauer metrics_t *metrics = get_global_metrics(); 92823458901SLorenz Bauer if (metrics == NULL) { 92923458901SLorenz Bauer return TC_ACT_SHOT; 93023458901SLorenz Bauer } 93123458901SLorenz Bauer 93223458901SLorenz Bauer metrics->processed_packets_total++; 93323458901SLorenz Bauer 93423458901SLorenz Bauer /* Pass bogus packets as long as we're not sure they're 93523458901SLorenz Bauer * destined for us. 93623458901SLorenz Bauer */ 93723458901SLorenz Bauer if (skb->protocol != bpf_htons(ETH_P_IP)) { 93823458901SLorenz Bauer return TC_ACT_OK; 93923458901SLorenz Bauer } 94023458901SLorenz Bauer 94123458901SLorenz Bauer encap_headers_t *encap; 94223458901SLorenz Bauer 94323458901SLorenz Bauer /* Make sure that all encapsulation headers are available in 94423458901SLorenz Bauer * the linear portion of the skb. This makes it easy to manipulate them. 94523458901SLorenz Bauer */ 94623458901SLorenz Bauer if (bpf_skb_pull_data(skb, sizeof(*encap))) { 94723458901SLorenz Bauer return TC_ACT_OK; 94823458901SLorenz Bauer } 94923458901SLorenz Bauer 95023458901SLorenz Bauer buf_t pkt = { 95123458901SLorenz Bauer .skb = skb, 95223458901SLorenz Bauer .head = (uint8_t *)(long)skb->data, 95323458901SLorenz Bauer .tail = (uint8_t *)(long)skb->data_end, 95423458901SLorenz Bauer }; 95523458901SLorenz Bauer 95623458901SLorenz Bauer encap = buf_assign(&pkt, sizeof(*encap), NULL); 95723458901SLorenz Bauer if (encap == NULL) { 95823458901SLorenz Bauer return TC_ACT_OK; 95923458901SLorenz Bauer } 96023458901SLorenz Bauer 96123458901SLorenz Bauer if (encap->ip.ihl != 5) { 96223458901SLorenz Bauer /* We never have any options. */ 96323458901SLorenz Bauer return TC_ACT_OK; 96423458901SLorenz Bauer } 96523458901SLorenz Bauer 96623458901SLorenz Bauer if (encap->ip.daddr != ENCAPSULATION_IP || 96723458901SLorenz Bauer encap->ip.protocol != IPPROTO_UDP) { 96823458901SLorenz Bauer return TC_ACT_OK; 96923458901SLorenz Bauer } 97023458901SLorenz Bauer 97123458901SLorenz Bauer /* TODO Check UDP length? */ 97223458901SLorenz Bauer if (encap->udp.dest != ENCAPSULATION_PORT) { 97323458901SLorenz Bauer return TC_ACT_OK; 97423458901SLorenz Bauer } 97523458901SLorenz Bauer 97623458901SLorenz Bauer /* We now know that the packet is destined to us, we can 97723458901SLorenz Bauer * drop bogus ones. 97823458901SLorenz Bauer */ 97923458901SLorenz Bauer if (ipv4_is_fragment((void *)&encap->ip)) { 98023458901SLorenz Bauer metrics->errors_total_fragmented_ip++; 98123458901SLorenz Bauer return TC_ACT_SHOT; 98223458901SLorenz Bauer } 98323458901SLorenz Bauer 98423458901SLorenz Bauer if (encap->gue.variant != 0) { 98523458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 98623458901SLorenz Bauer return TC_ACT_SHOT; 98723458901SLorenz Bauer } 98823458901SLorenz Bauer 98923458901SLorenz Bauer if (encap->gue.control != 0) { 99023458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 99123458901SLorenz Bauer return TC_ACT_SHOT; 99223458901SLorenz Bauer } 99323458901SLorenz Bauer 99423458901SLorenz Bauer if (encap->gue.flags != 0) { 99523458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 99623458901SLorenz Bauer return TC_ACT_SHOT; 99723458901SLorenz Bauer } 99823458901SLorenz Bauer 99923458901SLorenz Bauer if (encap->gue.hlen != 100023458901SLorenz Bauer sizeof(encap->unigue) / 4 + encap->unigue.hop_count) { 100123458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 100223458901SLorenz Bauer return TC_ACT_SHOT; 100323458901SLorenz Bauer } 100423458901SLorenz Bauer 100523458901SLorenz Bauer if (encap->unigue.version != 0) { 100623458901SLorenz Bauer metrics->errors_total_malformed_encapsulation++; 100723458901SLorenz Bauer return TC_ACT_SHOT; 100823458901SLorenz Bauer } 100923458901SLorenz Bauer 101023458901SLorenz Bauer if (encap->unigue.reserved != 0) { 101123458901SLorenz Bauer return TC_ACT_SHOT; 101223458901SLorenz Bauer } 101323458901SLorenz Bauer 101423458901SLorenz Bauer struct in_addr next_hop; 101523458901SLorenz Bauer MAYBE_RETURN(get_next_hop(&pkt, encap, &next_hop)); 101623458901SLorenz Bauer 101723458901SLorenz Bauer if (next_hop.s_addr == 0) { 101823458901SLorenz Bauer metrics->accepted_packets_total_last_hop++; 101923458901SLorenz Bauer return accept_locally(skb, encap); 102023458901SLorenz Bauer } 102123458901SLorenz Bauer 102223458901SLorenz Bauer verdict_t verdict; 102323458901SLorenz Bauer switch (encap->gue.proto_ctype) { 102423458901SLorenz Bauer case IPPROTO_IPIP: 102523458901SLorenz Bauer verdict = process_ipv4(&pkt, metrics); 102623458901SLorenz Bauer break; 102723458901SLorenz Bauer 102823458901SLorenz Bauer case IPPROTO_IPV6: 102923458901SLorenz Bauer verdict = process_ipv6(&pkt, metrics); 103023458901SLorenz Bauer break; 103123458901SLorenz Bauer 103223458901SLorenz Bauer default: 103323458901SLorenz Bauer metrics->errors_total_unknown_l3_proto++; 103423458901SLorenz Bauer return TC_ACT_SHOT; 103523458901SLorenz Bauer } 103623458901SLorenz Bauer 103723458901SLorenz Bauer switch (verdict) { 103823458901SLorenz Bauer case INVALID: 103923458901SLorenz Bauer /* metrics have already been bumped */ 104023458901SLorenz Bauer return TC_ACT_SHOT; 104123458901SLorenz Bauer 104223458901SLorenz Bauer case UNKNOWN: 104323458901SLorenz Bauer return forward_to_next_hop(skb, encap, &next_hop, metrics); 104423458901SLorenz Bauer 104523458901SLorenz Bauer case ECHO_REQUEST: 104623458901SLorenz Bauer metrics->accepted_packets_total_icmp_echo_request++; 104723458901SLorenz Bauer break; 104823458901SLorenz Bauer 104923458901SLorenz Bauer case SYN: 105023458901SLorenz Bauer if (encap->unigue.forward_syn) { 105123458901SLorenz Bauer return forward_to_next_hop(skb, encap, &next_hop, 105223458901SLorenz Bauer metrics); 105323458901SLorenz Bauer } 105423458901SLorenz Bauer 105523458901SLorenz Bauer metrics->accepted_packets_total_syn++; 105623458901SLorenz Bauer break; 105723458901SLorenz Bauer 105823458901SLorenz Bauer case SYN_COOKIE: 105923458901SLorenz Bauer metrics->accepted_packets_total_syn_cookies++; 106023458901SLorenz Bauer break; 106123458901SLorenz Bauer 106223458901SLorenz Bauer case ESTABLISHED: 106323458901SLorenz Bauer metrics->accepted_packets_total_established++; 106423458901SLorenz Bauer break; 106523458901SLorenz Bauer } 106623458901SLorenz Bauer 106723458901SLorenz Bauer return accept_locally(skb, encap); 106823458901SLorenz Bauer } 1069