1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 
4 #include <stddef.h>
5 #include <errno.h>
6 #include <stdbool.h>
7 #include <sys/types.h>
8 #include <sys/socket.h>
9 #include <linux/tcp.h>
10 #include <linux/socket.h>
11 #include <linux/bpf.h>
12 #include <linux/types.h>
13 #include <bpf/bpf_helpers.h>
14 #include <bpf/bpf_endian.h>
15 #define BPF_PROG_TEST_TCP_HDR_OPTIONS
16 #include "test_tcp_hdr_options.h"
17 
18 #ifndef sizeof_field
19 #define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
20 #endif
21 
22 __u8 test_kind = TCPOPT_EXP;
23 __u16 test_magic = 0xeB9F;
24 
25 struct bpf_test_option passive_synack_out = {};
26 struct bpf_test_option passive_fin_out	= {};
27 
28 struct bpf_test_option passive_estab_in = {};
29 struct bpf_test_option passive_fin_in	= {};
30 
31 struct bpf_test_option active_syn_out	= {};
32 struct bpf_test_option active_fin_out	= {};
33 
34 struct bpf_test_option active_estab_in	= {};
35 struct bpf_test_option active_fin_in	= {};
36 
37 struct {
38 	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
39 	__uint(map_flags, BPF_F_NO_PREALLOC);
40 	__type(key, int);
41 	__type(value, struct hdr_stg);
42 } hdr_stg_map SEC(".maps");
43 
44 static bool skops_want_cookie(const struct bpf_sock_ops *skops)
45 {
46 	return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE;
47 }
48 
49 static bool skops_current_mss(const struct bpf_sock_ops *skops)
50 {
51 	return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS;
52 }
53 
54 static __u8 option_total_len(__u8 flags)
55 {
56 	__u8 i, len = 1; /* +1 for flags */
57 
58 	if (!flags)
59 		return 0;
60 
61 	/* RESEND bit does not use a byte */
62 	for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++)
63 		len += !!TEST_OPTION_FLAGS(flags, i);
64 
65 	if (test_kind == TCPOPT_EXP)
66 		return len + TCP_BPF_EXPOPT_BASE_LEN;
67 	else
68 		return len + 2; /* +1 kind, +1 kind-len */
69 }
70 
71 static void write_test_option(const struct bpf_test_option *test_opt,
72 			      __u8 *data)
73 {
74 	__u8 offset = 0;
75 
76 	data[offset++] = test_opt->flags;
77 	if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS))
78 		data[offset++] = test_opt->max_delack_ms;
79 
80 	if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND))
81 		data[offset++] = test_opt->rand;
82 }
83 
84 static int store_option(struct bpf_sock_ops *skops,
85 			const struct bpf_test_option *test_opt)
86 {
87 	union {
88 		struct tcp_exprm_opt exprm;
89 		struct tcp_opt regular;
90 	} write_opt;
91 	int err;
92 
93 	if (test_kind == TCPOPT_EXP) {
94 		write_opt.exprm.kind = TCPOPT_EXP;
95 		write_opt.exprm.len = option_total_len(test_opt->flags);
96 		write_opt.exprm.magic = __bpf_htons(test_magic);
97 		write_opt.exprm.data32 = 0;
98 		write_test_option(test_opt, write_opt.exprm.data);
99 		err = bpf_store_hdr_opt(skops, &write_opt.exprm,
100 					sizeof(write_opt.exprm), 0);
101 	} else {
102 		write_opt.regular.kind = test_kind;
103 		write_opt.regular.len = option_total_len(test_opt->flags);
104 		write_opt.regular.data32 = 0;
105 		write_test_option(test_opt, write_opt.regular.data);
106 		err = bpf_store_hdr_opt(skops, &write_opt.regular,
107 					sizeof(write_opt.regular), 0);
108 	}
109 
110 	if (err)
111 		RET_CG_ERR(err);
112 
113 	return CG_OK;
114 }
115 
116 static int parse_test_option(struct bpf_test_option *opt, const __u8 *start)
117 {
118 	opt->flags = *start++;
119 
120 	if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS))
121 		opt->max_delack_ms = *start++;
122 
123 	if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND))
124 		opt->rand = *start++;
125 
126 	return 0;
127 }
128 
129 static int load_option(struct bpf_sock_ops *skops,
130 		       struct bpf_test_option *test_opt, bool from_syn)
131 {
132 	union {
133 		struct tcp_exprm_opt exprm;
134 		struct tcp_opt regular;
135 	} search_opt;
136 	int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
137 
138 	if (test_kind == TCPOPT_EXP) {
139 		search_opt.exprm.kind = TCPOPT_EXP;
140 		search_opt.exprm.len = 4;
141 		search_opt.exprm.magic = __bpf_htons(test_magic);
142 		search_opt.exprm.data32 = 0;
143 		ret = bpf_load_hdr_opt(skops, &search_opt.exprm,
144 				       sizeof(search_opt.exprm), load_flags);
145 		if (ret < 0)
146 			return ret;
147 		return parse_test_option(test_opt, search_opt.exprm.data);
148 	} else {
149 		search_opt.regular.kind = test_kind;
150 		search_opt.regular.len = 0;
151 		search_opt.regular.data32 = 0;
152 		ret = bpf_load_hdr_opt(skops, &search_opt.regular,
153 				       sizeof(search_opt.regular), load_flags);
154 		if (ret < 0)
155 			return ret;
156 		return parse_test_option(test_opt, search_opt.regular.data);
157 	}
158 }
159 
160 static int synack_opt_len(struct bpf_sock_ops *skops)
161 {
162 	struct bpf_test_option test_opt = {};
163 	__u8 optlen;
164 	int err;
165 
166 	if (!passive_synack_out.flags)
167 		return CG_OK;
168 
169 	err = load_option(skops, &test_opt, true);
170 
171 	/* bpf_test_option is not found */
172 	if (err == -ENOMSG)
173 		return CG_OK;
174 
175 	if (err)
176 		RET_CG_ERR(err);
177 
178 	optlen = option_total_len(passive_synack_out.flags);
179 	if (optlen) {
180 		err = bpf_reserve_hdr_opt(skops, optlen, 0);
181 		if (err)
182 			RET_CG_ERR(err);
183 	}
184 
185 	return CG_OK;
186 }
187 
188 static int write_synack_opt(struct bpf_sock_ops *skops)
189 {
190 	struct bpf_test_option opt;
191 
192 	if (!passive_synack_out.flags)
193 		/* We should not even be called since no header
194 		 * space has been reserved.
195 		 */
196 		RET_CG_ERR(0);
197 
198 	opt = passive_synack_out;
199 	if (skops_want_cookie(skops))
200 		SET_OPTION_FLAGS(opt.flags, OPTION_RESEND);
201 
202 	return store_option(skops, &opt);
203 }
204 
205 static int syn_opt_len(struct bpf_sock_ops *skops)
206 {
207 	__u8 optlen;
208 	int err;
209 
210 	if (!active_syn_out.flags)
211 		return CG_OK;
212 
213 	optlen = option_total_len(active_syn_out.flags);
214 	if (optlen) {
215 		err = bpf_reserve_hdr_opt(skops, optlen, 0);
216 		if (err)
217 			RET_CG_ERR(err);
218 	}
219 
220 	return CG_OK;
221 }
222 
223 static int write_syn_opt(struct bpf_sock_ops *skops)
224 {
225 	if (!active_syn_out.flags)
226 		RET_CG_ERR(0);
227 
228 	return store_option(skops, &active_syn_out);
229 }
230 
231 static int fin_opt_len(struct bpf_sock_ops *skops)
232 {
233 	struct bpf_test_option *opt;
234 	struct hdr_stg *hdr_stg;
235 	__u8 optlen;
236 	int err;
237 
238 	if (!skops->sk)
239 		RET_CG_ERR(0);
240 
241 	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
242 	if (!hdr_stg)
243 		RET_CG_ERR(0);
244 
245 	if (hdr_stg->active)
246 		opt = &active_fin_out;
247 	else
248 		opt = &passive_fin_out;
249 
250 	optlen = option_total_len(opt->flags);
251 	if (optlen) {
252 		err = bpf_reserve_hdr_opt(skops, optlen, 0);
253 		if (err)
254 			RET_CG_ERR(err);
255 	}
256 
257 	return CG_OK;
258 }
259 
260 static int write_fin_opt(struct bpf_sock_ops *skops)
261 {
262 	struct bpf_test_option *opt;
263 	struct hdr_stg *hdr_stg;
264 
265 	if (!skops->sk)
266 		RET_CG_ERR(0);
267 
268 	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
269 	if (!hdr_stg)
270 		RET_CG_ERR(0);
271 
272 	if (hdr_stg->active)
273 		opt = &active_fin_out;
274 	else
275 		opt = &passive_fin_out;
276 
277 	if (!opt->flags)
278 		RET_CG_ERR(0);
279 
280 	return store_option(skops, opt);
281 }
282 
283 static int resend_in_ack(struct bpf_sock_ops *skops)
284 {
285 	struct hdr_stg *hdr_stg;
286 
287 	if (!skops->sk)
288 		return -1;
289 
290 	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
291 	if (!hdr_stg)
292 		return -1;
293 
294 	return !!hdr_stg->resend_syn;
295 }
296 
297 static int nodata_opt_len(struct bpf_sock_ops *skops)
298 {
299 	int resend;
300 
301 	resend = resend_in_ack(skops);
302 	if (resend < 0)
303 		RET_CG_ERR(0);
304 
305 	if (resend)
306 		return syn_opt_len(skops);
307 
308 	return CG_OK;
309 }
310 
311 static int write_nodata_opt(struct bpf_sock_ops *skops)
312 {
313 	int resend;
314 
315 	resend = resend_in_ack(skops);
316 	if (resend < 0)
317 		RET_CG_ERR(0);
318 
319 	if (resend)
320 		return write_syn_opt(skops);
321 
322 	return CG_OK;
323 }
324 
325 static int data_opt_len(struct bpf_sock_ops *skops)
326 {
327 	/* Same as the nodata version.  Mostly to show
328 	 * an example usage on skops->skb_len.
329 	 */
330 	return nodata_opt_len(skops);
331 }
332 
333 static int write_data_opt(struct bpf_sock_ops *skops)
334 {
335 	return write_nodata_opt(skops);
336 }
337 
338 static int current_mss_opt_len(struct bpf_sock_ops *skops)
339 {
340 	/* Reserve maximum that may be needed */
341 	int err;
342 
343 	err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0);
344 	if (err)
345 		RET_CG_ERR(err);
346 
347 	return CG_OK;
348 }
349 
350 static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
351 {
352 	__u8 tcp_flags = skops_tcp_flags(skops);
353 
354 	if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
355 		return synack_opt_len(skops);
356 
357 	if (tcp_flags & TCPHDR_SYN)
358 		return syn_opt_len(skops);
359 
360 	if (tcp_flags & TCPHDR_FIN)
361 		return fin_opt_len(skops);
362 
363 	if (skops_current_mss(skops))
364 		/* The kernel is calculating the MSS */
365 		return current_mss_opt_len(skops);
366 
367 	if (skops->skb_len)
368 		return data_opt_len(skops);
369 
370 	return nodata_opt_len(skops);
371 }
372 
373 static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
374 {
375 	__u8 tcp_flags = skops_tcp_flags(skops);
376 	struct tcphdr *th;
377 
378 	if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
379 		return write_synack_opt(skops);
380 
381 	if (tcp_flags & TCPHDR_SYN)
382 		return write_syn_opt(skops);
383 
384 	if (tcp_flags & TCPHDR_FIN)
385 		return write_fin_opt(skops);
386 
387 	th = skops->skb_data;
388 	if (th + 1 > skops->skb_data_end)
389 		RET_CG_ERR(0);
390 
391 	if (skops->skb_len > tcp_hdrlen(th))
392 		return write_data_opt(skops);
393 
394 	return write_nodata_opt(skops);
395 }
396 
397 static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms)
398 {
399 	__u32 max_delack_us = max_delack_ms * 1000;
400 
401 	return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX,
402 			      &max_delack_us, sizeof(max_delack_us));
403 }
404 
405 static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms)
406 {
407 	__u32 min_rto_us = peer_max_delack_ms * 1000;
408 
409 	return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us,
410 			      sizeof(min_rto_us));
411 }
412 
413 static int handle_active_estab(struct bpf_sock_ops *skops)
414 {
415 	struct hdr_stg init_stg = {
416 		.active = true,
417 	};
418 	int err;
419 
420 	err = load_option(skops, &active_estab_in, false);
421 	if (err && err != -ENOMSG)
422 		RET_CG_ERR(err);
423 
424 	init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags,
425 						OPTION_RESEND);
426 	if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk,
427 					      &init_stg,
428 					      BPF_SK_STORAGE_GET_F_CREATE))
429 		RET_CG_ERR(0);
430 
431 	if (init_stg.resend_syn)
432 		/* Don't clear the write_hdr cb now because
433 		 * the ACK may get lost and retransmit may
434 		 * be needed.
435 		 *
436 		 * PARSE_ALL_HDR cb flag is set to learn if this
437 		 * resend_syn option has received by the peer.
438 		 *
439 		 * The header option will be resent until a valid
440 		 * packet is received at handle_parse_hdr()
441 		 * and all hdr cb flags will be cleared in
442 		 * handle_parse_hdr().
443 		 */
444 		set_parse_all_hdr_cb_flags(skops);
445 	else if (!active_fin_out.flags)
446 		/* No options will be written from now */
447 		clear_hdr_cb_flags(skops);
448 
449 	if (active_syn_out.max_delack_ms) {
450 		err = set_delack_max(skops, active_syn_out.max_delack_ms);
451 		if (err)
452 			RET_CG_ERR(err);
453 	}
454 
455 	if (active_estab_in.max_delack_ms) {
456 		err = set_rto_min(skops, active_estab_in.max_delack_ms);
457 		if (err)
458 			RET_CG_ERR(err);
459 	}
460 
461 	return CG_OK;
462 }
463 
464 static int handle_passive_estab(struct bpf_sock_ops *skops)
465 {
466 	struct hdr_stg init_stg = {};
467 	struct tcphdr *th;
468 	int err;
469 
470 	err = load_option(skops, &passive_estab_in, true);
471 	if (err == -ENOENT) {
472 		/* saved_syn is not found. It was in syncookie mode.
473 		 * We have asked the active side to resend the options
474 		 * in ACK, so try to find the bpf_test_option from ACK now.
475 		 */
476 		err = load_option(skops, &passive_estab_in, false);
477 		init_stg.syncookie = true;
478 	}
479 
480 	/* ENOMSG: The bpf_test_option is not found which is fine.
481 	 * Bail out now for all other errors.
482 	 */
483 	if (err && err != -ENOMSG)
484 		RET_CG_ERR(err);
485 
486 	th = skops->skb_data;
487 	if (th + 1 > skops->skb_data_end)
488 		RET_CG_ERR(0);
489 
490 	if (th->syn) {
491 		/* Fastopen */
492 
493 		/* Cannot clear cb_flags to stop write_hdr cb.
494 		 * synack is not sent yet for fast open.
495 		 * Even it was, the synack may need to be retransmitted.
496 		 *
497 		 * PARSE_ALL_HDR cb flag is set to learn
498 		 * if synack has reached the peer.
499 		 * All cb_flags will be cleared in handle_parse_hdr().
500 		 */
501 		set_parse_all_hdr_cb_flags(skops);
502 		init_stg.fastopen = true;
503 	} else if (!passive_fin_out.flags) {
504 		/* No options will be written from now */
505 		clear_hdr_cb_flags(skops);
506 	}
507 
508 	if (!skops->sk ||
509 	    !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg,
510 				BPF_SK_STORAGE_GET_F_CREATE))
511 		RET_CG_ERR(0);
512 
513 	if (passive_synack_out.max_delack_ms) {
514 		err = set_delack_max(skops, passive_synack_out.max_delack_ms);
515 		if (err)
516 			RET_CG_ERR(err);
517 	}
518 
519 	if (passive_estab_in.max_delack_ms) {
520 		err = set_rto_min(skops, passive_estab_in.max_delack_ms);
521 		if (err)
522 			RET_CG_ERR(err);
523 	}
524 
525 	return CG_OK;
526 }
527 
528 static int handle_parse_hdr(struct bpf_sock_ops *skops)
529 {
530 	struct hdr_stg *hdr_stg;
531 	struct tcphdr *th;
532 
533 	if (!skops->sk)
534 		RET_CG_ERR(0);
535 
536 	th = skops->skb_data;
537 	if (th + 1 > skops->skb_data_end)
538 		RET_CG_ERR(0);
539 
540 	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
541 	if (!hdr_stg)
542 		RET_CG_ERR(0);
543 
544 	if (hdr_stg->resend_syn || hdr_stg->fastopen)
545 		/* The PARSE_ALL_HDR cb flag was turned on
546 		 * to ensure that the previously written
547 		 * options have reached the peer.
548 		 * Those previously written option includes:
549 		 *     - Active side: resend_syn in ACK during syncookie
550 		 *      or
551 		 *     - Passive side: SYNACK during fastopen
552 		 *
553 		 * A valid packet has been received here after
554 		 * the 3WHS, so the PARSE_ALL_HDR cb flag
555 		 * can be cleared now.
556 		 */
557 		clear_parse_all_hdr_cb_flags(skops);
558 
559 	if (hdr_stg->resend_syn && !active_fin_out.flags)
560 		/* Active side resent the syn option in ACK
561 		 * because the server was in syncookie mode.
562 		 * A valid packet has been received, so
563 		 * clear header cb flags if there is no
564 		 * more option to send.
565 		 */
566 		clear_hdr_cb_flags(skops);
567 
568 	if (hdr_stg->fastopen && !passive_fin_out.flags)
569 		/* Passive side was in fastopen.
570 		 * A valid packet has been received, so
571 		 * the SYNACK has reached the peer.
572 		 * Clear header cb flags if there is no more
573 		 * option to send.
574 		 */
575 		clear_hdr_cb_flags(skops);
576 
577 	if (th->fin) {
578 		struct bpf_test_option *fin_opt;
579 		int err;
580 
581 		if (hdr_stg->active)
582 			fin_opt = &active_fin_in;
583 		else
584 			fin_opt = &passive_fin_in;
585 
586 		err = load_option(skops, fin_opt, false);
587 		if (err && err != -ENOMSG)
588 			RET_CG_ERR(err);
589 	}
590 
591 	return CG_OK;
592 }
593 
594 SEC("sockops/estab")
595 int estab(struct bpf_sock_ops *skops)
596 {
597 	int true_val = 1;
598 
599 	switch (skops->op) {
600 	case BPF_SOCK_OPS_TCP_LISTEN_CB:
601 		bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
602 			       &true_val, sizeof(true_val));
603 		set_hdr_cb_flags(skops);
604 		break;
605 	case BPF_SOCK_OPS_TCP_CONNECT_CB:
606 		set_hdr_cb_flags(skops);
607 		break;
608 	case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
609 		return handle_parse_hdr(skops);
610 	case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
611 		return handle_hdr_opt_len(skops);
612 	case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
613 		return handle_write_hdr_opt(skops);
614 	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
615 		return handle_passive_estab(skops);
616 	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
617 		return handle_active_estab(skops);
618 	}
619 
620 	return CG_OK;
621 }
622 
623 char _license[] SEC("license") = "GPL";
624