1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (c) 2019 Cloudflare Ltd. 3 // Copyright (c) 2020 Isovalent, Inc. 4 5 #include <stddef.h> 6 #include <stdbool.h> 7 #include <string.h> 8 #include <linux/bpf.h> 9 #include <linux/if_ether.h> 10 #include <linux/in.h> 11 #include <linux/ip.h> 12 #include <linux/ipv6.h> 13 #include <linux/pkt_cls.h> 14 #include <linux/tcp.h> 15 #include <sys/socket.h> 16 #include <bpf/bpf_helpers.h> 17 #include <bpf/bpf_endian.h> 18 19 int _version SEC("version") = 1; 20 char _license[] SEC("license") = "GPL"; 21 22 /* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */ 23 static inline struct bpf_sock_tuple * 24 get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp) 25 { 26 void *data_end = (void *)(long)skb->data_end; 27 void *data = (void *)(long)skb->data; 28 struct bpf_sock_tuple *result; 29 struct ethhdr *eth; 30 __u64 tuple_len; 31 __u8 proto = 0; 32 __u64 ihl_len; 33 34 eth = (struct ethhdr *)(data); 35 if (eth + 1 > data_end) 36 return NULL; 37 38 if (eth->h_proto == bpf_htons(ETH_P_IP)) { 39 struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth)); 40 41 if (iph + 1 > data_end) 42 return NULL; 43 if (iph->ihl != 5) 44 /* Options are not supported */ 45 return NULL; 46 ihl_len = iph->ihl * 4; 47 proto = iph->protocol; 48 *ipv4 = true; 49 result = (struct bpf_sock_tuple *)&iph->saddr; 50 } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { 51 struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth)); 52 53 if (ip6h + 1 > data_end) 54 return NULL; 55 ihl_len = sizeof(*ip6h); 56 proto = ip6h->nexthdr; 57 *ipv4 = false; 58 result = (struct bpf_sock_tuple *)&ip6h->saddr; 59 } else { 60 return (struct bpf_sock_tuple *)data; 61 } 62 63 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) 64 return NULL; 65 66 *tcp = (proto == IPPROTO_TCP); 67 return result; 68 } 69 70 static inline int 71 handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) 72 { 73 struct bpf_sock_tuple ln = {0}; 74 struct bpf_sock *sk; 75 size_t tuple_len; 76 int ret; 77 78 tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); 79 if ((void *)tuple + tuple_len > (void *)(long)skb->data_end) 80 return TC_ACT_SHOT; 81 82 sk = bpf_sk_lookup_udp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); 83 if (sk) 84 goto assign; 85 86 if (ipv4) { 87 if (tuple->ipv4.dport != bpf_htons(4321)) 88 return TC_ACT_OK; 89 90 ln.ipv4.daddr = bpf_htonl(0x7f000001); 91 ln.ipv4.dport = bpf_htons(1234); 92 93 sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv4), 94 BPF_F_CURRENT_NETNS, 0); 95 } else { 96 if (tuple->ipv6.dport != bpf_htons(4321)) 97 return TC_ACT_OK; 98 99 /* Upper parts of daddr are already zero. */ 100 ln.ipv6.daddr[3] = bpf_htonl(0x1); 101 ln.ipv6.dport = bpf_htons(1234); 102 103 sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv6), 104 BPF_F_CURRENT_NETNS, 0); 105 } 106 107 /* workaround: We can't do a single socket lookup here, because then 108 * the compiler will likely spill tuple_len to the stack. This makes it 109 * lose all bounds information in the verifier, which then rejects the 110 * call as unsafe. 111 */ 112 if (!sk) 113 return TC_ACT_SHOT; 114 115 assign: 116 ret = bpf_sk_assign(skb, sk, 0); 117 bpf_sk_release(sk); 118 return ret; 119 } 120 121 static inline int 122 handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) 123 { 124 struct bpf_sock_tuple ln = {0}; 125 struct bpf_sock *sk; 126 size_t tuple_len; 127 int ret; 128 129 tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); 130 if ((void *)tuple + tuple_len > (void *)(long)skb->data_end) 131 return TC_ACT_SHOT; 132 133 sk = bpf_skc_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); 134 if (sk) { 135 if (sk->state != BPF_TCP_LISTEN) 136 goto assign; 137 bpf_sk_release(sk); 138 } 139 140 if (ipv4) { 141 if (tuple->ipv4.dport != bpf_htons(4321)) 142 return TC_ACT_OK; 143 144 ln.ipv4.daddr = bpf_htonl(0x7f000001); 145 ln.ipv4.dport = bpf_htons(1234); 146 147 sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv4), 148 BPF_F_CURRENT_NETNS, 0); 149 } else { 150 if (tuple->ipv6.dport != bpf_htons(4321)) 151 return TC_ACT_OK; 152 153 /* Upper parts of daddr are already zero. */ 154 ln.ipv6.daddr[3] = bpf_htonl(0x1); 155 ln.ipv6.dport = bpf_htons(1234); 156 157 sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv6), 158 BPF_F_CURRENT_NETNS, 0); 159 } 160 161 /* workaround: We can't do a single socket lookup here, because then 162 * the compiler will likely spill tuple_len to the stack. This makes it 163 * lose all bounds information in the verifier, which then rejects the 164 * call as unsafe. 165 */ 166 if (!sk) 167 return TC_ACT_SHOT; 168 169 if (sk->state != BPF_TCP_LISTEN) { 170 bpf_sk_release(sk); 171 return TC_ACT_SHOT; 172 } 173 174 assign: 175 ret = bpf_sk_assign(skb, sk, 0); 176 bpf_sk_release(sk); 177 return ret; 178 } 179 180 SEC("classifier/sk_assign_test") 181 int bpf_sk_assign_test(struct __sk_buff *skb) 182 { 183 struct bpf_sock_tuple *tuple, ln = {0}; 184 bool ipv4 = false; 185 bool tcp = false; 186 int tuple_len; 187 int ret = 0; 188 189 tuple = get_tuple(skb, &ipv4, &tcp); 190 if (!tuple) 191 return TC_ACT_SHOT; 192 193 /* Note that the verifier socket return type for bpf_skc_lookup_tcp() 194 * differs from bpf_sk_lookup_udp(), so even though the C-level type is 195 * the same here, if we try to share the implementations they will 196 * fail to verify because we're crossing pointer types. 197 */ 198 if (tcp) 199 ret = handle_tcp(skb, tuple, ipv4); 200 else 201 ret = handle_udp(skb, tuple, ipv4); 202 203 return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT; 204 } 205