1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 
4 #include <stddef.h>
5 #include <errno.h>
6 #include <stdbool.h>
7 #include <sys/types.h>
8 #include <sys/socket.h>
9 #include <linux/ipv6.h>
10 #include <linux/tcp.h>
11 #include <linux/socket.h>
12 #include <linux/bpf.h>
13 #include <linux/types.h>
14 #include <bpf/bpf_helpers.h>
15 #include <bpf/bpf_endian.h>
16 #define BPF_PROG_TEST_TCP_HDR_OPTIONS
17 #include "test_tcp_hdr_options.h"
18 
19 __u16 last_addr16_n = __bpf_htons(1);
20 __u16 active_lport_n = 0;
21 __u16 active_lport_h = 0;
22 __u16 passive_lport_n = 0;
23 __u16 passive_lport_h = 0;
24 
25 /* options received at passive side */
26 unsigned int nr_pure_ack = 0;
27 unsigned int nr_data = 0;
28 unsigned int nr_syn = 0;
29 unsigned int nr_fin = 0;
30 unsigned int nr_hwtstamp = 0;
31 
32 /* Check the header received from the active side */
__check_active_hdr_in(struct bpf_sock_ops * skops,bool check_syn)33 static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn)
34 {
35 	union {
36 		struct tcphdr th;
37 		struct ipv6hdr ip6;
38 		struct tcp_exprm_opt exprm_opt;
39 		struct tcp_opt reg_opt;
40 		__u8 data[100]; /* IPv6 (40) + Max TCP hdr (60) */
41 	} hdr = {};
42 	__u64 load_flags = check_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
43 	struct tcphdr *pth;
44 	int ret;
45 
46 	hdr.reg_opt.kind = 0xB9;
47 
48 	/* The option is 4 bytes long instead of 2 bytes */
49 	ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, 2, load_flags);
50 	if (ret != -ENOSPC)
51 		RET_CG_ERR(ret);
52 
53 	/* Test searching magic with regular kind */
54 	hdr.reg_opt.len = 4;
55 	ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
56 			       load_flags);
57 	if (ret != -EINVAL)
58 		RET_CG_ERR(ret);
59 
60 	hdr.reg_opt.len = 0;
61 	ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
62 			       load_flags);
63 	if (ret != 4 || hdr.reg_opt.len != 4 || hdr.reg_opt.kind != 0xB9 ||
64 	    hdr.reg_opt.data[0] != 0xfa || hdr.reg_opt.data[1] != 0xce)
65 		RET_CG_ERR(ret);
66 
67 	/* Test searching experimental option with invalid kind length */
68 	hdr.exprm_opt.kind = TCPOPT_EXP;
69 	hdr.exprm_opt.len = 5;
70 	hdr.exprm_opt.magic = 0;
71 	ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
72 			       load_flags);
73 	if (ret != -EINVAL)
74 		RET_CG_ERR(ret);
75 
76 	/* Test searching experimental option with 0 magic value */
77 	hdr.exprm_opt.len = 4;
78 	ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
79 			       load_flags);
80 	if (ret != -ENOMSG)
81 		RET_CG_ERR(ret);
82 
83 	hdr.exprm_opt.magic = __bpf_htons(0xeB9F);
84 	ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
85 			       load_flags);
86 	if (ret != 4 || hdr.exprm_opt.len != 4 ||
87 	    hdr.exprm_opt.kind != TCPOPT_EXP ||
88 	    hdr.exprm_opt.magic != __bpf_htons(0xeB9F))
89 		RET_CG_ERR(ret);
90 
91 	if (!check_syn)
92 		return CG_OK;
93 
94 	/* Test loading from skops->syn_skb if sk_state == TCP_NEW_SYN_RECV
95 	 *
96 	 * Test loading from tp->saved_syn for other sk_state.
97 	 */
98 	ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr.ip6,
99 			     sizeof(hdr.ip6));
100 	if (ret != -ENOSPC)
101 		RET_CG_ERR(ret);
102 
103 	if (hdr.ip6.saddr.s6_addr16[7] != last_addr16_n ||
104 	    hdr.ip6.daddr.s6_addr16[7] != last_addr16_n)
105 		RET_CG_ERR(0);
106 
107 	ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr, sizeof(hdr));
108 	if (ret < 0)
109 		RET_CG_ERR(ret);
110 
111 	pth = (struct tcphdr *)(&hdr.ip6 + 1);
112 	if (pth->dest != passive_lport_n || pth->source != active_lport_n)
113 		RET_CG_ERR(0);
114 
115 	ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN, &hdr, sizeof(hdr));
116 	if (ret < 0)
117 		RET_CG_ERR(ret);
118 
119 	if (hdr.th.dest != passive_lport_n || hdr.th.source != active_lport_n)
120 		RET_CG_ERR(0);
121 
122 	return CG_OK;
123 }
124 
check_active_syn_in(struct bpf_sock_ops * skops)125 static int check_active_syn_in(struct bpf_sock_ops *skops)
126 {
127 	return __check_active_hdr_in(skops, true);
128 }
129 
check_active_hdr_in(struct bpf_sock_ops * skops)130 static int check_active_hdr_in(struct bpf_sock_ops *skops)
131 {
132 	struct tcphdr *th;
133 
134 	if (__check_active_hdr_in(skops, false) == CG_ERR)
135 		return CG_ERR;
136 
137 	th = skops->skb_data;
138 	if (th + 1 > skops->skb_data_end)
139 		RET_CG_ERR(0);
140 
141 	if (tcp_hdrlen(th) < skops->skb_len)
142 		nr_data++;
143 
144 	if (th->fin)
145 		nr_fin++;
146 
147 	if (th->ack && !th->fin && tcp_hdrlen(th) == skops->skb_len)
148 		nr_pure_ack++;
149 
150 	if (skops->skb_hwtstamp)
151 		nr_hwtstamp++;
152 
153 	return CG_OK;
154 }
155 
active_opt_len(struct bpf_sock_ops * skops)156 static int active_opt_len(struct bpf_sock_ops *skops)
157 {
158 	int err;
159 
160 	/* Reserve more than enough to allow the -EEXIST test in
161 	 * the write_active_opt().
162 	 */
163 	err = bpf_reserve_hdr_opt(skops, 12, 0);
164 	if (err)
165 		RET_CG_ERR(err);
166 
167 	return CG_OK;
168 }
169 
write_active_opt(struct bpf_sock_ops * skops)170 static int write_active_opt(struct bpf_sock_ops *skops)
171 {
172 	struct tcp_exprm_opt exprm_opt = {};
173 	struct tcp_opt win_scale_opt = {};
174 	struct tcp_opt reg_opt = {};
175 	struct tcphdr *th;
176 	int err, ret;
177 
178 	exprm_opt.kind = TCPOPT_EXP;
179 	exprm_opt.len = 4;
180 	exprm_opt.magic = __bpf_htons(0xeB9F);
181 
182 	reg_opt.kind = 0xB9;
183 	reg_opt.len = 4;
184 	reg_opt.data[0] = 0xfa;
185 	reg_opt.data[1] = 0xce;
186 
187 	win_scale_opt.kind = TCPOPT_WINDOW;
188 
189 	err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
190 	if (err)
191 		RET_CG_ERR(err);
192 
193 	/* Store the same exprm option */
194 	err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
195 	if (err != -EEXIST)
196 		RET_CG_ERR(err);
197 
198 	err = bpf_store_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
199 	if (err)
200 		RET_CG_ERR(err);
201 	err = bpf_store_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
202 	if (err != -EEXIST)
203 		RET_CG_ERR(err);
204 
205 	/* Check the option has been written and can be searched */
206 	ret = bpf_load_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
207 	if (ret != 4 || exprm_opt.len != 4 || exprm_opt.kind != TCPOPT_EXP ||
208 	    exprm_opt.magic != __bpf_htons(0xeB9F))
209 		RET_CG_ERR(ret);
210 
211 	reg_opt.len = 0;
212 	ret = bpf_load_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
213 	if (ret != 4 || reg_opt.len != 4 || reg_opt.kind != 0xB9 ||
214 	    reg_opt.data[0] != 0xfa || reg_opt.data[1] != 0xce)
215 		RET_CG_ERR(ret);
216 
217 	th = skops->skb_data;
218 	if (th + 1 > skops->skb_data_end)
219 		RET_CG_ERR(0);
220 
221 	if (th->syn) {
222 		active_lport_h = skops->local_port;
223 		active_lport_n = th->source;
224 
225 		/* Search the win scale option written by kernel
226 		 * in the SYN packet.
227 		 */
228 		ret = bpf_load_hdr_opt(skops, &win_scale_opt,
229 				       sizeof(win_scale_opt), 0);
230 		if (ret != 3 || win_scale_opt.len != 3 ||
231 		    win_scale_opt.kind != TCPOPT_WINDOW)
232 			RET_CG_ERR(ret);
233 
234 		/* Write the win scale option that kernel
235 		 * has already written.
236 		 */
237 		err = bpf_store_hdr_opt(skops, &win_scale_opt,
238 					sizeof(win_scale_opt), 0);
239 		if (err != -EEXIST)
240 			RET_CG_ERR(err);
241 	}
242 
243 	return CG_OK;
244 }
245 
handle_hdr_opt_len(struct bpf_sock_ops * skops)246 static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
247 {
248 	__u8 tcp_flags = skops_tcp_flags(skops);
249 
250 	if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
251 		/* Check the SYN from bpf_sock_ops_kern->syn_skb */
252 		return check_active_syn_in(skops);
253 
254 	/* Passive side should have cleared the write hdr cb by now */
255 	if (skops->local_port == passive_lport_h)
256 		RET_CG_ERR(0);
257 
258 	return active_opt_len(skops);
259 }
260 
handle_write_hdr_opt(struct bpf_sock_ops * skops)261 static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
262 {
263 	if (skops->local_port == passive_lport_h)
264 		RET_CG_ERR(0);
265 
266 	return write_active_opt(skops);
267 }
268 
handle_parse_hdr(struct bpf_sock_ops * skops)269 static int handle_parse_hdr(struct bpf_sock_ops *skops)
270 {
271 	/* Passive side is not writing any non-standard/unknown
272 	 * option, so the active side should never be called.
273 	 */
274 	if (skops->local_port == active_lport_h)
275 		RET_CG_ERR(0);
276 
277 	return check_active_hdr_in(skops);
278 }
279 
handle_passive_estab(struct bpf_sock_ops * skops)280 static int handle_passive_estab(struct bpf_sock_ops *skops)
281 {
282 	int err;
283 
284 	/* No more write hdr cb */
285 	bpf_sock_ops_cb_flags_set(skops,
286 				  skops->bpf_sock_ops_cb_flags &
287 				  ~BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
288 
289 	/* Recheck the SYN but check the tp->saved_syn this time */
290 	err = check_active_syn_in(skops);
291 	if (err == CG_ERR)
292 		return err;
293 
294 	nr_syn++;
295 
296 	/* The ack has header option written by the active side also */
297 	return check_active_hdr_in(skops);
298 }
299 
300 SEC("sockops")
misc_estab(struct bpf_sock_ops * skops)301 int misc_estab(struct bpf_sock_ops *skops)
302 {
303 	int true_val = 1;
304 
305 	switch (skops->op) {
306 	case BPF_SOCK_OPS_TCP_LISTEN_CB:
307 		passive_lport_h = skops->local_port;
308 		passive_lport_n = __bpf_htons(passive_lport_h);
309 		bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
310 			       &true_val, sizeof(true_val));
311 		set_hdr_cb_flags(skops, 0);
312 		break;
313 	case BPF_SOCK_OPS_TCP_CONNECT_CB:
314 		set_hdr_cb_flags(skops, 0);
315 		break;
316 	case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
317 		return handle_parse_hdr(skops);
318 	case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
319 		return handle_hdr_opt_len(skops);
320 	case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
321 		return handle_write_hdr_opt(skops);
322 	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
323 		return handle_passive_estab(skops);
324 	}
325 
326 	return CG_OK;
327 }
328 
329 char _license[] SEC("license") = "GPL";
330