1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2019 Cloudflare Ltd.
3 // Copyright (c) 2020 Isovalent, Inc.
4 
5 #include <stddef.h>
6 #include <stdbool.h>
7 #include <string.h>
8 #include <linux/bpf.h>
9 #include <linux/if_ether.h>
10 #include <linux/in.h>
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/pkt_cls.h>
14 #include <linux/tcp.h>
15 #include <sys/socket.h>
16 #include <bpf/bpf_helpers.h>
17 #include <bpf/bpf_endian.h>
18 
19 int _version SEC("version") = 1;
20 char _license[] SEC("license") = "GPL";
21 
22 /* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
23 static inline struct bpf_sock_tuple *
24 get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
25 {
26 	void *data_end = (void *)(long)skb->data_end;
27 	void *data = (void *)(long)skb->data;
28 	struct bpf_sock_tuple *result;
29 	struct ethhdr *eth;
30 	__u64 tuple_len;
31 	__u8 proto = 0;
32 	__u64 ihl_len;
33 
34 	eth = (struct ethhdr *)(data);
35 	if (eth + 1 > data_end)
36 		return NULL;
37 
38 	if (eth->h_proto == bpf_htons(ETH_P_IP)) {
39 		struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth));
40 
41 		if (iph + 1 > data_end)
42 			return NULL;
43 		if (iph->ihl != 5)
44 			/* Options are not supported */
45 			return NULL;
46 		ihl_len = iph->ihl * 4;
47 		proto = iph->protocol;
48 		*ipv4 = true;
49 		result = (struct bpf_sock_tuple *)&iph->saddr;
50 	} else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
51 		struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth));
52 
53 		if (ip6h + 1 > data_end)
54 			return NULL;
55 		ihl_len = sizeof(*ip6h);
56 		proto = ip6h->nexthdr;
57 		*ipv4 = false;
58 		result = (struct bpf_sock_tuple *)&ip6h->saddr;
59 	} else {
60 		return (struct bpf_sock_tuple *)data;
61 	}
62 
63 	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP)
64 		return NULL;
65 
66 	*tcp = (proto == IPPROTO_TCP);
67 	return result;
68 }
69 
70 static inline int
71 handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
72 {
73 	struct bpf_sock_tuple ln = {0};
74 	struct bpf_sock *sk;
75 	size_t tuple_len;
76 	int ret;
77 
78 	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
79 	if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
80 		return TC_ACT_SHOT;
81 
82 	sk = bpf_sk_lookup_udp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
83 	if (sk)
84 		goto assign;
85 
86 	if (ipv4) {
87 		if (tuple->ipv4.dport != bpf_htons(4321))
88 			return TC_ACT_OK;
89 
90 		ln.ipv4.daddr = bpf_htonl(0x7f000001);
91 		ln.ipv4.dport = bpf_htons(1234);
92 
93 		sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv4),
94 					BPF_F_CURRENT_NETNS, 0);
95 	} else {
96 		if (tuple->ipv6.dport != bpf_htons(4321))
97 			return TC_ACT_OK;
98 
99 		/* Upper parts of daddr are already zero. */
100 		ln.ipv6.daddr[3] = bpf_htonl(0x1);
101 		ln.ipv6.dport = bpf_htons(1234);
102 
103 		sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv6),
104 					BPF_F_CURRENT_NETNS, 0);
105 	}
106 
107 	/* workaround: We can't do a single socket lookup here, because then
108 	 * the compiler will likely spill tuple_len to the stack. This makes it
109 	 * lose all bounds information in the verifier, which then rejects the
110 	 * call as unsafe.
111 	 */
112 	if (!sk)
113 		return TC_ACT_SHOT;
114 
115 assign:
116 	ret = bpf_sk_assign(skb, sk, 0);
117 	bpf_sk_release(sk);
118 	return ret;
119 }
120 
121 static inline int
122 handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
123 {
124 	struct bpf_sock_tuple ln = {0};
125 	struct bpf_sock *sk;
126 	size_t tuple_len;
127 	int ret;
128 
129 	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
130 	if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
131 		return TC_ACT_SHOT;
132 
133 	sk = bpf_skc_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
134 	if (sk) {
135 		if (sk->state != BPF_TCP_LISTEN)
136 			goto assign;
137 		bpf_sk_release(sk);
138 	}
139 
140 	if (ipv4) {
141 		if (tuple->ipv4.dport != bpf_htons(4321))
142 			return TC_ACT_OK;
143 
144 		ln.ipv4.daddr = bpf_htonl(0x7f000001);
145 		ln.ipv4.dport = bpf_htons(1234);
146 
147 		sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv4),
148 					BPF_F_CURRENT_NETNS, 0);
149 	} else {
150 		if (tuple->ipv6.dport != bpf_htons(4321))
151 			return TC_ACT_OK;
152 
153 		/* Upper parts of daddr are already zero. */
154 		ln.ipv6.daddr[3] = bpf_htonl(0x1);
155 		ln.ipv6.dport = bpf_htons(1234);
156 
157 		sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv6),
158 					BPF_F_CURRENT_NETNS, 0);
159 	}
160 
161 	/* workaround: We can't do a single socket lookup here, because then
162 	 * the compiler will likely spill tuple_len to the stack. This makes it
163 	 * lose all bounds information in the verifier, which then rejects the
164 	 * call as unsafe.
165 	 */
166 	if (!sk)
167 		return TC_ACT_SHOT;
168 
169 	if (sk->state != BPF_TCP_LISTEN) {
170 		bpf_sk_release(sk);
171 		return TC_ACT_SHOT;
172 	}
173 
174 assign:
175 	ret = bpf_sk_assign(skb, sk, 0);
176 	bpf_sk_release(sk);
177 	return ret;
178 }
179 
180 SEC("classifier/sk_assign_test")
181 int bpf_sk_assign_test(struct __sk_buff *skb)
182 {
183 	struct bpf_sock_tuple *tuple, ln = {0};
184 	bool ipv4 = false;
185 	bool tcp = false;
186 	int tuple_len;
187 	int ret = 0;
188 
189 	tuple = get_tuple(skb, &ipv4, &tcp);
190 	if (!tuple)
191 		return TC_ACT_SHOT;
192 
193 	/* Note that the verifier socket return type for bpf_skc_lookup_tcp()
194 	 * differs from bpf_sk_lookup_udp(), so even though the C-level type is
195 	 * the same here, if we try to share the implementations they will
196 	 * fail to verify because we're crossing pointer types.
197 	 */
198 	if (tcp)
199 		ret = handle_tcp(skb, tuple, ipv4);
200 	else
201 		ret = handle_udp(skb, tuple, ipv4);
202 
203 	return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT;
204 }
205