1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (c) 2019 Cloudflare Ltd. 3 // Copyright (c) 2020 Isovalent, Inc. 4 5 #include <stddef.h> 6 #include <stdbool.h> 7 #include <string.h> 8 #include <linux/bpf.h> 9 #include <linux/if_ether.h> 10 #include <linux/in.h> 11 #include <linux/ip.h> 12 #include <linux/ipv6.h> 13 #include <linux/pkt_cls.h> 14 #include <linux/tcp.h> 15 #include <sys/socket.h> 16 #include <bpf/bpf_helpers.h> 17 #include <bpf/bpf_endian.h> 18 #include "bpf_misc.h" 19 20 #if defined(IPROUTE2_HAVE_LIBBPF) 21 /* Use a new-style map definition. */ 22 struct { 23 __uint(type, BPF_MAP_TYPE_SOCKMAP); 24 __type(key, int); 25 __type(value, __u64); 26 __uint(pinning, LIBBPF_PIN_BY_NAME); 27 __uint(max_entries, 1); 28 } server_map SEC(".maps"); 29 #else 30 /* Pin map under /sys/fs/bpf/tc/globals/<map name> */ 31 #define PIN_GLOBAL_NS 2 32 33 /* Must match struct bpf_elf_map layout from iproute2 */ 34 struct { 35 __u32 type; 36 __u32 size_key; 37 __u32 size_value; 38 __u32 max_elem; 39 __u32 flags; 40 __u32 id; 41 __u32 pinning; 42 } server_map SEC("maps") = { 43 .type = BPF_MAP_TYPE_SOCKMAP, 44 .size_key = sizeof(int), 45 .size_value = sizeof(__u64), 46 .max_elem = 1, 47 .pinning = PIN_GLOBAL_NS, 48 }; 49 #endif 50 51 char _license[] SEC("license") = "GPL"; 52 53 /* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */ 54 static inline struct bpf_sock_tuple * 55 get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp) 56 { 57 void *data_end = (void *)(long)skb->data_end; 58 void *data = (void *)(long)skb->data; 59 struct bpf_sock_tuple *result; 60 struct ethhdr *eth; 61 __u8 proto = 0; 62 __u64 ihl_len; 63 64 eth = (struct ethhdr *)(data); 65 if (eth + 1 > data_end) 66 return NULL; 67 68 if (eth->h_proto == bpf_htons(ETH_P_IP)) { 69 struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth)); 70 71 if (iph + 1 > data_end) 72 return NULL; 73 if (iph->ihl != 5) 74 /* Options are not supported */ 75 return NULL; 76 ihl_len = iph->ihl * 4; 77 proto = iph->protocol; 78 *ipv4 = true; 79 result = (struct bpf_sock_tuple *)&iph->saddr; 80 } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { 81 struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth)); 82 83 if (ip6h + 1 > data_end) 84 return NULL; 85 ihl_len = sizeof(*ip6h); 86 proto = ip6h->nexthdr; 87 *ipv4 = false; 88 result = (struct bpf_sock_tuple *)&ip6h->saddr; 89 } else { 90 return (struct bpf_sock_tuple *)data; 91 } 92 93 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) 94 return NULL; 95 96 *tcp = (proto == IPPROTO_TCP); 97 __sink(ihl_len); 98 return result; 99 } 100 101 static inline int 102 handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) 103 { 104 struct bpf_sock *sk; 105 const int zero = 0; 106 size_t tuple_len; 107 __be16 dport; 108 int ret; 109 110 tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); 111 if ((void *)tuple + tuple_len > (void *)(long)skb->data_end) 112 return TC_ACT_SHOT; 113 114 sk = bpf_sk_lookup_udp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); 115 if (sk) 116 goto assign; 117 118 dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport; 119 if (dport != bpf_htons(4321)) 120 return TC_ACT_OK; 121 122 sk = bpf_map_lookup_elem(&server_map, &zero); 123 if (!sk) 124 return TC_ACT_SHOT; 125 126 assign: 127 ret = bpf_sk_assign(skb, sk, 0); 128 bpf_sk_release(sk); 129 return ret; 130 } 131 132 static inline int 133 handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) 134 { 135 struct bpf_sock *sk; 136 const int zero = 0; 137 size_t tuple_len; 138 __be16 dport; 139 int ret; 140 141 tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); 142 if ((void *)tuple + tuple_len > (void *)(long)skb->data_end) 143 return TC_ACT_SHOT; 144 145 sk = bpf_skc_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); 146 if (sk) { 147 if (sk->state != BPF_TCP_LISTEN) 148 goto assign; 149 bpf_sk_release(sk); 150 } 151 152 dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport; 153 if (dport != bpf_htons(4321)) 154 return TC_ACT_OK; 155 156 sk = bpf_map_lookup_elem(&server_map, &zero); 157 if (!sk) 158 return TC_ACT_SHOT; 159 160 if (sk->state != BPF_TCP_LISTEN) { 161 bpf_sk_release(sk); 162 return TC_ACT_SHOT; 163 } 164 165 assign: 166 ret = bpf_sk_assign(skb, sk, 0); 167 bpf_sk_release(sk); 168 return ret; 169 } 170 171 SEC("tc") 172 int bpf_sk_assign_test(struct __sk_buff *skb) 173 { 174 struct bpf_sock_tuple *tuple; 175 bool ipv4 = false; 176 bool tcp = false; 177 int ret = 0; 178 179 tuple = get_tuple(skb, &ipv4, &tcp); 180 if (!tuple) 181 return TC_ACT_SHOT; 182 183 /* Note that the verifier socket return type for bpf_skc_lookup_tcp() 184 * differs from bpf_sk_lookup_udp(), so even though the C-level type is 185 * the same here, if we try to share the implementations they will 186 * fail to verify because we're crossing pointer types. 187 */ 188 if (tcp) 189 ret = handle_tcp(skb, tuple, ipv4); 190 else 191 ret = handle_udp(skb, tuple, ipv4); 192 193 return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT; 194 } 195