1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2020 Facebook */ 3 4 #include <stddef.h> 5 #include <errno.h> 6 #include <stdbool.h> 7 #include <sys/types.h> 8 #include <sys/socket.h> 9 #include <linux/tcp.h> 10 #include <linux/socket.h> 11 #include <linux/bpf.h> 12 #include <linux/types.h> 13 #include <bpf/bpf_helpers.h> 14 #include <bpf/bpf_endian.h> 15 #define BPF_PROG_TEST_TCP_HDR_OPTIONS 16 #include "test_tcp_hdr_options.h" 17 18 #ifndef sizeof_field 19 #define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) 20 #endif 21 22 __u8 test_kind = TCPOPT_EXP; 23 __u16 test_magic = 0xeB9F; 24 25 struct bpf_test_option passive_synack_out = {}; 26 struct bpf_test_option passive_fin_out = {}; 27 28 struct bpf_test_option passive_estab_in = {}; 29 struct bpf_test_option passive_fin_in = {}; 30 31 struct bpf_test_option active_syn_out = {}; 32 struct bpf_test_option active_fin_out = {}; 33 34 struct bpf_test_option active_estab_in = {}; 35 struct bpf_test_option active_fin_in = {}; 36 37 struct { 38 __uint(type, BPF_MAP_TYPE_SK_STORAGE); 39 __uint(map_flags, BPF_F_NO_PREALLOC); 40 __type(key, int); 41 __type(value, struct hdr_stg); 42 } hdr_stg_map SEC(".maps"); 43 44 static bool skops_want_cookie(const struct bpf_sock_ops *skops) 45 { 46 return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE; 47 } 48 49 static bool skops_current_mss(const struct bpf_sock_ops *skops) 50 { 51 return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS; 52 } 53 54 static __u8 option_total_len(__u8 flags) 55 { 56 __u8 i, len = 1; /* +1 for flags */ 57 58 if (!flags) 59 return 0; 60 61 /* RESEND bit does not use a byte */ 62 for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++) 63 len += !!TEST_OPTION_FLAGS(flags, i); 64 65 if (test_kind == TCPOPT_EXP) 66 return len + TCP_BPF_EXPOPT_BASE_LEN; 67 else 68 return len + 2; /* +1 kind, +1 kind-len */ 69 } 70 71 static void write_test_option(const struct bpf_test_option *test_opt, 72 __u8 *data) 73 { 74 __u8 offset = 0; 75 76 data[offset++] = test_opt->flags; 77 if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS)) 78 data[offset++] = test_opt->max_delack_ms; 79 80 if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND)) 81 data[offset++] = test_opt->rand; 82 } 83 84 static int store_option(struct bpf_sock_ops *skops, 85 const struct bpf_test_option *test_opt) 86 { 87 union { 88 struct tcp_exprm_opt exprm; 89 struct tcp_opt regular; 90 } write_opt; 91 int err; 92 93 if (test_kind == TCPOPT_EXP) { 94 write_opt.exprm.kind = TCPOPT_EXP; 95 write_opt.exprm.len = option_total_len(test_opt->flags); 96 write_opt.exprm.magic = __bpf_htons(test_magic); 97 write_opt.exprm.data32 = 0; 98 write_test_option(test_opt, write_opt.exprm.data); 99 err = bpf_store_hdr_opt(skops, &write_opt.exprm, 100 sizeof(write_opt.exprm), 0); 101 } else { 102 write_opt.regular.kind = test_kind; 103 write_opt.regular.len = option_total_len(test_opt->flags); 104 write_opt.regular.data32 = 0; 105 write_test_option(test_opt, write_opt.regular.data); 106 err = bpf_store_hdr_opt(skops, &write_opt.regular, 107 sizeof(write_opt.regular), 0); 108 } 109 110 if (err) 111 RET_CG_ERR(err); 112 113 return CG_OK; 114 } 115 116 static int parse_test_option(struct bpf_test_option *opt, const __u8 *start) 117 { 118 opt->flags = *start++; 119 120 if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS)) 121 opt->max_delack_ms = *start++; 122 123 if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND)) 124 opt->rand = *start++; 125 126 return 0; 127 } 128 129 static int load_option(struct bpf_sock_ops *skops, 130 struct bpf_test_option *test_opt, bool from_syn) 131 { 132 union { 133 struct tcp_exprm_opt exprm; 134 struct tcp_opt regular; 135 } search_opt; 136 int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0; 137 138 if (test_kind == TCPOPT_EXP) { 139 search_opt.exprm.kind = TCPOPT_EXP; 140 search_opt.exprm.len = 4; 141 search_opt.exprm.magic = __bpf_htons(test_magic); 142 search_opt.exprm.data32 = 0; 143 ret = bpf_load_hdr_opt(skops, &search_opt.exprm, 144 sizeof(search_opt.exprm), load_flags); 145 if (ret < 0) 146 return ret; 147 return parse_test_option(test_opt, search_opt.exprm.data); 148 } else { 149 search_opt.regular.kind = test_kind; 150 search_opt.regular.len = 0; 151 search_opt.regular.data32 = 0; 152 ret = bpf_load_hdr_opt(skops, &search_opt.regular, 153 sizeof(search_opt.regular), load_flags); 154 if (ret < 0) 155 return ret; 156 return parse_test_option(test_opt, search_opt.regular.data); 157 } 158 } 159 160 static int synack_opt_len(struct bpf_sock_ops *skops) 161 { 162 struct bpf_test_option test_opt = {}; 163 __u8 optlen; 164 int err; 165 166 if (!passive_synack_out.flags) 167 return CG_OK; 168 169 err = load_option(skops, &test_opt, true); 170 171 /* bpf_test_option is not found */ 172 if (err == -ENOMSG) 173 return CG_OK; 174 175 if (err) 176 RET_CG_ERR(err); 177 178 optlen = option_total_len(passive_synack_out.flags); 179 if (optlen) { 180 err = bpf_reserve_hdr_opt(skops, optlen, 0); 181 if (err) 182 RET_CG_ERR(err); 183 } 184 185 return CG_OK; 186 } 187 188 static int write_synack_opt(struct bpf_sock_ops *skops) 189 { 190 struct bpf_test_option opt; 191 192 if (!passive_synack_out.flags) 193 /* We should not even be called since no header 194 * space has been reserved. 195 */ 196 RET_CG_ERR(0); 197 198 opt = passive_synack_out; 199 if (skops_want_cookie(skops)) 200 SET_OPTION_FLAGS(opt.flags, OPTION_RESEND); 201 202 return store_option(skops, &opt); 203 } 204 205 static int syn_opt_len(struct bpf_sock_ops *skops) 206 { 207 __u8 optlen; 208 int err; 209 210 if (!active_syn_out.flags) 211 return CG_OK; 212 213 optlen = option_total_len(active_syn_out.flags); 214 if (optlen) { 215 err = bpf_reserve_hdr_opt(skops, optlen, 0); 216 if (err) 217 RET_CG_ERR(err); 218 } 219 220 return CG_OK; 221 } 222 223 static int write_syn_opt(struct bpf_sock_ops *skops) 224 { 225 if (!active_syn_out.flags) 226 RET_CG_ERR(0); 227 228 return store_option(skops, &active_syn_out); 229 } 230 231 static int fin_opt_len(struct bpf_sock_ops *skops) 232 { 233 struct bpf_test_option *opt; 234 struct hdr_stg *hdr_stg; 235 __u8 optlen; 236 int err; 237 238 if (!skops->sk) 239 RET_CG_ERR(0); 240 241 hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); 242 if (!hdr_stg) 243 RET_CG_ERR(0); 244 245 if (hdr_stg->active) 246 opt = &active_fin_out; 247 else 248 opt = &passive_fin_out; 249 250 optlen = option_total_len(opt->flags); 251 if (optlen) { 252 err = bpf_reserve_hdr_opt(skops, optlen, 0); 253 if (err) 254 RET_CG_ERR(err); 255 } 256 257 return CG_OK; 258 } 259 260 static int write_fin_opt(struct bpf_sock_ops *skops) 261 { 262 struct bpf_test_option *opt; 263 struct hdr_stg *hdr_stg; 264 265 if (!skops->sk) 266 RET_CG_ERR(0); 267 268 hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); 269 if (!hdr_stg) 270 RET_CG_ERR(0); 271 272 if (hdr_stg->active) 273 opt = &active_fin_out; 274 else 275 opt = &passive_fin_out; 276 277 if (!opt->flags) 278 RET_CG_ERR(0); 279 280 return store_option(skops, opt); 281 } 282 283 static int resend_in_ack(struct bpf_sock_ops *skops) 284 { 285 struct hdr_stg *hdr_stg; 286 287 if (!skops->sk) 288 return -1; 289 290 hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); 291 if (!hdr_stg) 292 return -1; 293 294 return !!hdr_stg->resend_syn; 295 } 296 297 static int nodata_opt_len(struct bpf_sock_ops *skops) 298 { 299 int resend; 300 301 resend = resend_in_ack(skops); 302 if (resend < 0) 303 RET_CG_ERR(0); 304 305 if (resend) 306 return syn_opt_len(skops); 307 308 return CG_OK; 309 } 310 311 static int write_nodata_opt(struct bpf_sock_ops *skops) 312 { 313 int resend; 314 315 resend = resend_in_ack(skops); 316 if (resend < 0) 317 RET_CG_ERR(0); 318 319 if (resend) 320 return write_syn_opt(skops); 321 322 return CG_OK; 323 } 324 325 static int data_opt_len(struct bpf_sock_ops *skops) 326 { 327 /* Same as the nodata version. Mostly to show 328 * an example usage on skops->skb_len. 329 */ 330 return nodata_opt_len(skops); 331 } 332 333 static int write_data_opt(struct bpf_sock_ops *skops) 334 { 335 return write_nodata_opt(skops); 336 } 337 338 static int current_mss_opt_len(struct bpf_sock_ops *skops) 339 { 340 /* Reserve maximum that may be needed */ 341 int err; 342 343 err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0); 344 if (err) 345 RET_CG_ERR(err); 346 347 return CG_OK; 348 } 349 350 static int handle_hdr_opt_len(struct bpf_sock_ops *skops) 351 { 352 __u8 tcp_flags = skops_tcp_flags(skops); 353 354 if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) 355 return synack_opt_len(skops); 356 357 if (tcp_flags & TCPHDR_SYN) 358 return syn_opt_len(skops); 359 360 if (tcp_flags & TCPHDR_FIN) 361 return fin_opt_len(skops); 362 363 if (skops_current_mss(skops)) 364 /* The kernel is calculating the MSS */ 365 return current_mss_opt_len(skops); 366 367 if (skops->skb_len) 368 return data_opt_len(skops); 369 370 return nodata_opt_len(skops); 371 } 372 373 static int handle_write_hdr_opt(struct bpf_sock_ops *skops) 374 { 375 __u8 tcp_flags = skops_tcp_flags(skops); 376 struct tcphdr *th; 377 378 if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) 379 return write_synack_opt(skops); 380 381 if (tcp_flags & TCPHDR_SYN) 382 return write_syn_opt(skops); 383 384 if (tcp_flags & TCPHDR_FIN) 385 return write_fin_opt(skops); 386 387 th = skops->skb_data; 388 if (th + 1 > skops->skb_data_end) 389 RET_CG_ERR(0); 390 391 if (skops->skb_len > tcp_hdrlen(th)) 392 return write_data_opt(skops); 393 394 return write_nodata_opt(skops); 395 } 396 397 static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms) 398 { 399 __u32 max_delack_us = max_delack_ms * 1000; 400 401 return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX, 402 &max_delack_us, sizeof(max_delack_us)); 403 } 404 405 static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms) 406 { 407 __u32 min_rto_us = peer_max_delack_ms * 1000; 408 409 return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us, 410 sizeof(min_rto_us)); 411 } 412 413 static int handle_active_estab(struct bpf_sock_ops *skops) 414 { 415 struct hdr_stg init_stg = { 416 .active = true, 417 }; 418 int err; 419 420 err = load_option(skops, &active_estab_in, false); 421 if (err && err != -ENOMSG) 422 RET_CG_ERR(err); 423 424 init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags, 425 OPTION_RESEND); 426 if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk, 427 &init_stg, 428 BPF_SK_STORAGE_GET_F_CREATE)) 429 RET_CG_ERR(0); 430 431 if (init_stg.resend_syn) 432 /* Don't clear the write_hdr cb now because 433 * the ACK may get lost and retransmit may 434 * be needed. 435 * 436 * PARSE_ALL_HDR cb flag is set to learn if this 437 * resend_syn option has received by the peer. 438 * 439 * The header option will be resent until a valid 440 * packet is received at handle_parse_hdr() 441 * and all hdr cb flags will be cleared in 442 * handle_parse_hdr(). 443 */ 444 set_parse_all_hdr_cb_flags(skops); 445 else if (!active_fin_out.flags) 446 /* No options will be written from now */ 447 clear_hdr_cb_flags(skops); 448 449 if (active_syn_out.max_delack_ms) { 450 err = set_delack_max(skops, active_syn_out.max_delack_ms); 451 if (err) 452 RET_CG_ERR(err); 453 } 454 455 if (active_estab_in.max_delack_ms) { 456 err = set_rto_min(skops, active_estab_in.max_delack_ms); 457 if (err) 458 RET_CG_ERR(err); 459 } 460 461 return CG_OK; 462 } 463 464 static int handle_passive_estab(struct bpf_sock_ops *skops) 465 { 466 struct hdr_stg init_stg = {}; 467 struct tcphdr *th; 468 int err; 469 470 err = load_option(skops, &passive_estab_in, true); 471 if (err == -ENOENT) { 472 /* saved_syn is not found. It was in syncookie mode. 473 * We have asked the active side to resend the options 474 * in ACK, so try to find the bpf_test_option from ACK now. 475 */ 476 err = load_option(skops, &passive_estab_in, false); 477 init_stg.syncookie = true; 478 } 479 480 /* ENOMSG: The bpf_test_option is not found which is fine. 481 * Bail out now for all other errors. 482 */ 483 if (err && err != -ENOMSG) 484 RET_CG_ERR(err); 485 486 th = skops->skb_data; 487 if (th + 1 > skops->skb_data_end) 488 RET_CG_ERR(0); 489 490 if (th->syn) { 491 /* Fastopen */ 492 493 /* Cannot clear cb_flags to stop write_hdr cb. 494 * synack is not sent yet for fast open. 495 * Even it was, the synack may need to be retransmitted. 496 * 497 * PARSE_ALL_HDR cb flag is set to learn 498 * if synack has reached the peer. 499 * All cb_flags will be cleared in handle_parse_hdr(). 500 */ 501 set_parse_all_hdr_cb_flags(skops); 502 init_stg.fastopen = true; 503 } else if (!passive_fin_out.flags) { 504 /* No options will be written from now */ 505 clear_hdr_cb_flags(skops); 506 } 507 508 if (!skops->sk || 509 !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg, 510 BPF_SK_STORAGE_GET_F_CREATE)) 511 RET_CG_ERR(0); 512 513 if (passive_synack_out.max_delack_ms) { 514 err = set_delack_max(skops, passive_synack_out.max_delack_ms); 515 if (err) 516 RET_CG_ERR(err); 517 } 518 519 if (passive_estab_in.max_delack_ms) { 520 err = set_rto_min(skops, passive_estab_in.max_delack_ms); 521 if (err) 522 RET_CG_ERR(err); 523 } 524 525 return CG_OK; 526 } 527 528 static int handle_parse_hdr(struct bpf_sock_ops *skops) 529 { 530 struct hdr_stg *hdr_stg; 531 struct tcphdr *th; 532 533 if (!skops->sk) 534 RET_CG_ERR(0); 535 536 th = skops->skb_data; 537 if (th + 1 > skops->skb_data_end) 538 RET_CG_ERR(0); 539 540 hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); 541 if (!hdr_stg) 542 RET_CG_ERR(0); 543 544 if (hdr_stg->resend_syn || hdr_stg->fastopen) 545 /* The PARSE_ALL_HDR cb flag was turned on 546 * to ensure that the previously written 547 * options have reached the peer. 548 * Those previously written option includes: 549 * - Active side: resend_syn in ACK during syncookie 550 * or 551 * - Passive side: SYNACK during fastopen 552 * 553 * A valid packet has been received here after 554 * the 3WHS, so the PARSE_ALL_HDR cb flag 555 * can be cleared now. 556 */ 557 clear_parse_all_hdr_cb_flags(skops); 558 559 if (hdr_stg->resend_syn && !active_fin_out.flags) 560 /* Active side resent the syn option in ACK 561 * because the server was in syncookie mode. 562 * A valid packet has been received, so 563 * clear header cb flags if there is no 564 * more option to send. 565 */ 566 clear_hdr_cb_flags(skops); 567 568 if (hdr_stg->fastopen && !passive_fin_out.flags) 569 /* Passive side was in fastopen. 570 * A valid packet has been received, so 571 * the SYNACK has reached the peer. 572 * Clear header cb flags if there is no more 573 * option to send. 574 */ 575 clear_hdr_cb_flags(skops); 576 577 if (th->fin) { 578 struct bpf_test_option *fin_opt; 579 int err; 580 581 if (hdr_stg->active) 582 fin_opt = &active_fin_in; 583 else 584 fin_opt = &passive_fin_in; 585 586 err = load_option(skops, fin_opt, false); 587 if (err && err != -ENOMSG) 588 RET_CG_ERR(err); 589 } 590 591 return CG_OK; 592 } 593 594 SEC("sockops/estab") 595 int estab(struct bpf_sock_ops *skops) 596 { 597 int true_val = 1; 598 599 switch (skops->op) { 600 case BPF_SOCK_OPS_TCP_LISTEN_CB: 601 bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN, 602 &true_val, sizeof(true_val)); 603 set_hdr_cb_flags(skops); 604 break; 605 case BPF_SOCK_OPS_TCP_CONNECT_CB: 606 set_hdr_cb_flags(skops); 607 break; 608 case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: 609 return handle_parse_hdr(skops); 610 case BPF_SOCK_OPS_HDR_OPT_LEN_CB: 611 return handle_hdr_opt_len(skops); 612 case BPF_SOCK_OPS_WRITE_HDR_OPT_CB: 613 return handle_write_hdr_opt(skops); 614 case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: 615 return handle_passive_estab(skops); 616 case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: 617 return handle_active_estab(skops); 618 } 619 620 return CG_OK; 621 } 622 623 char _license[] SEC("license") = "GPL"; 624