1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (c) 2020 Cloudflare 3 /* 4 * Test suite for SOCKMAP/SOCKHASH holding listening sockets. 5 * Covers: 6 * 1. BPF map operations - bpf_map_{update,lookup delete}_elem 7 * 2. BPF redirect helpers - bpf_{sk,msg}_redirect_map 8 * 3. BPF reuseport helper - bpf_sk_select_reuseport 9 */ 10 11 #include <linux/compiler.h> 12 #include <errno.h> 13 #include <error.h> 14 #include <limits.h> 15 #include <netinet/in.h> 16 #include <pthread.h> 17 #include <stdlib.h> 18 #include <string.h> 19 #include <sys/select.h> 20 #include <unistd.h> 21 22 #include <bpf/bpf.h> 23 #include <bpf/libbpf.h> 24 25 #include "bpf_util.h" 26 #include "test_progs.h" 27 #include "test_sockmap_listen.skel.h" 28 29 #define IO_TIMEOUT_SEC 30 30 #define MAX_STRERR_LEN 256 31 #define MAX_TEST_NAME 80 32 33 #define _FAIL(errnum, fmt...) \ 34 ({ \ 35 error_at_line(0, (errnum), __func__, __LINE__, fmt); \ 36 CHECK_FAIL(true); \ 37 }) 38 #define FAIL(fmt...) _FAIL(0, fmt) 39 #define FAIL_ERRNO(fmt...) _FAIL(errno, fmt) 40 #define FAIL_LIBBPF(err, msg) \ 41 ({ \ 42 char __buf[MAX_STRERR_LEN]; \ 43 libbpf_strerror((err), __buf, sizeof(__buf)); \ 44 FAIL("%s: %s", (msg), __buf); \ 45 }) 46 47 /* Wrappers that fail the test on error and report it. */ 48 49 #define xaccept_nonblock(fd, addr, len) \ 50 ({ \ 51 int __ret = \ 52 accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \ 53 if (__ret == -1) \ 54 FAIL_ERRNO("accept"); \ 55 __ret; \ 56 }) 57 58 #define xbind(fd, addr, len) \ 59 ({ \ 60 int __ret = bind((fd), (addr), (len)); \ 61 if (__ret == -1) \ 62 FAIL_ERRNO("bind"); \ 63 __ret; \ 64 }) 65 66 #define xclose(fd) \ 67 ({ \ 68 int __ret = close((fd)); \ 69 if (__ret == -1) \ 70 FAIL_ERRNO("close"); \ 71 __ret; \ 72 }) 73 74 #define xconnect(fd, addr, len) \ 75 ({ \ 76 int __ret = connect((fd), (addr), (len)); \ 77 if (__ret == -1) \ 78 FAIL_ERRNO("connect"); \ 79 __ret; \ 80 }) 81 82 #define xgetsockname(fd, addr, len) \ 83 ({ \ 84 int __ret = getsockname((fd), (addr), (len)); \ 85 if (__ret == -1) \ 86 FAIL_ERRNO("getsockname"); \ 87 __ret; \ 88 }) 89 90 #define xgetsockopt(fd, level, name, val, len) \ 91 ({ \ 92 int __ret = getsockopt((fd), (level), (name), (val), (len)); \ 93 if (__ret == -1) \ 94 FAIL_ERRNO("getsockopt(" #name ")"); \ 95 __ret; \ 96 }) 97 98 #define xlisten(fd, backlog) \ 99 ({ \ 100 int __ret = listen((fd), (backlog)); \ 101 if (__ret == -1) \ 102 FAIL_ERRNO("listen"); \ 103 __ret; \ 104 }) 105 106 #define xsetsockopt(fd, level, name, val, len) \ 107 ({ \ 108 int __ret = setsockopt((fd), (level), (name), (val), (len)); \ 109 if (__ret == -1) \ 110 FAIL_ERRNO("setsockopt(" #name ")"); \ 111 __ret; \ 112 }) 113 114 #define xsend(fd, buf, len, flags) \ 115 ({ \ 116 ssize_t __ret = send((fd), (buf), (len), (flags)); \ 117 if (__ret == -1) \ 118 FAIL_ERRNO("send"); \ 119 __ret; \ 120 }) 121 122 #define xrecv_nonblock(fd, buf, len, flags) \ 123 ({ \ 124 ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \ 125 IO_TIMEOUT_SEC); \ 126 if (__ret == -1) \ 127 FAIL_ERRNO("recv"); \ 128 __ret; \ 129 }) 130 131 #define xsocket(family, sotype, flags) \ 132 ({ \ 133 int __ret = socket(family, sotype, flags); \ 134 if (__ret == -1) \ 135 FAIL_ERRNO("socket"); \ 136 __ret; \ 137 }) 138 139 #define xbpf_map_delete_elem(fd, key) \ 140 ({ \ 141 int __ret = bpf_map_delete_elem((fd), (key)); \ 142 if (__ret < 0) \ 143 FAIL_ERRNO("map_delete"); \ 144 __ret; \ 145 }) 146 147 #define xbpf_map_lookup_elem(fd, key, val) \ 148 ({ \ 149 int __ret = bpf_map_lookup_elem((fd), (key), (val)); \ 150 if (__ret < 0) \ 151 FAIL_ERRNO("map_lookup"); \ 152 __ret; \ 153 }) 154 155 #define xbpf_map_update_elem(fd, key, val, flags) \ 156 ({ \ 157 int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \ 158 if (__ret < 0) \ 159 FAIL_ERRNO("map_update"); \ 160 __ret; \ 161 }) 162 163 #define xbpf_prog_attach(prog, target, type, flags) \ 164 ({ \ 165 int __ret = \ 166 bpf_prog_attach((prog), (target), (type), (flags)); \ 167 if (__ret < 0) \ 168 FAIL_ERRNO("prog_attach(" #type ")"); \ 169 __ret; \ 170 }) 171 172 #define xbpf_prog_detach2(prog, target, type) \ 173 ({ \ 174 int __ret = bpf_prog_detach2((prog), (target), (type)); \ 175 if (__ret < 0) \ 176 FAIL_ERRNO("prog_detach2(" #type ")"); \ 177 __ret; \ 178 }) 179 180 #define xpthread_create(thread, attr, func, arg) \ 181 ({ \ 182 int __ret = pthread_create((thread), (attr), (func), (arg)); \ 183 errno = __ret; \ 184 if (__ret) \ 185 FAIL_ERRNO("pthread_create"); \ 186 __ret; \ 187 }) 188 189 #define xpthread_join(thread, retval) \ 190 ({ \ 191 int __ret = pthread_join((thread), (retval)); \ 192 errno = __ret; \ 193 if (__ret) \ 194 FAIL_ERRNO("pthread_join"); \ 195 __ret; \ 196 }) 197 198 static int poll_read(int fd, unsigned int timeout_sec) 199 { 200 struct timeval timeout = { .tv_sec = timeout_sec }; 201 fd_set rfds; 202 int r; 203 204 FD_ZERO(&rfds); 205 FD_SET(fd, &rfds); 206 207 r = select(fd + 1, &rfds, NULL, NULL, &timeout); 208 if (r == 0) 209 errno = ETIME; 210 211 return r == 1 ? 0 : -1; 212 } 213 214 static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len, 215 unsigned int timeout_sec) 216 { 217 if (poll_read(fd, timeout_sec)) 218 return -1; 219 220 return accept(fd, addr, len); 221 } 222 223 static int recv_timeout(int fd, void *buf, size_t len, int flags, 224 unsigned int timeout_sec) 225 { 226 if (poll_read(fd, timeout_sec)) 227 return -1; 228 229 return recv(fd, buf, len, flags); 230 } 231 232 static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len) 233 { 234 struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss)); 235 236 addr4->sin_family = AF_INET; 237 addr4->sin_port = 0; 238 addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 239 *len = sizeof(*addr4); 240 } 241 242 static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len) 243 { 244 struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss)); 245 246 addr6->sin6_family = AF_INET6; 247 addr6->sin6_port = 0; 248 addr6->sin6_addr = in6addr_loopback; 249 *len = sizeof(*addr6); 250 } 251 252 static void init_addr_loopback(int family, struct sockaddr_storage *ss, 253 socklen_t *len) 254 { 255 switch (family) { 256 case AF_INET: 257 init_addr_loopback4(ss, len); 258 return; 259 case AF_INET6: 260 init_addr_loopback6(ss, len); 261 return; 262 default: 263 FAIL("unsupported address family %d", family); 264 } 265 } 266 267 static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss) 268 { 269 return (struct sockaddr *)ss; 270 } 271 272 static int enable_reuseport(int s, int progfd) 273 { 274 int err, one = 1; 275 276 err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); 277 if (err) 278 return -1; 279 err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd, 280 sizeof(progfd)); 281 if (err) 282 return -1; 283 284 return 0; 285 } 286 287 static int socket_loopback_reuseport(int family, int sotype, int progfd) 288 { 289 struct sockaddr_storage addr; 290 socklen_t len; 291 int err, s; 292 293 init_addr_loopback(family, &addr, &len); 294 295 s = xsocket(family, sotype, 0); 296 if (s == -1) 297 return -1; 298 299 if (progfd >= 0) 300 enable_reuseport(s, progfd); 301 302 err = xbind(s, sockaddr(&addr), len); 303 if (err) 304 goto close; 305 306 if (sotype & SOCK_DGRAM) 307 return s; 308 309 err = xlisten(s, SOMAXCONN); 310 if (err) 311 goto close; 312 313 return s; 314 close: 315 xclose(s); 316 return -1; 317 } 318 319 static int socket_loopback(int family, int sotype) 320 { 321 return socket_loopback_reuseport(family, sotype, -1); 322 } 323 324 static void test_insert_invalid(int family, int sotype, int mapfd) 325 { 326 u32 key = 0; 327 u64 value; 328 int err; 329 330 value = -1; 331 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 332 if (!err || errno != EINVAL) 333 FAIL_ERRNO("map_update: expected EINVAL"); 334 335 value = INT_MAX; 336 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 337 if (!err || errno != EBADF) 338 FAIL_ERRNO("map_update: expected EBADF"); 339 } 340 341 static void test_insert_opened(int family, int sotype, int mapfd) 342 { 343 u32 key = 0; 344 u64 value; 345 int err, s; 346 347 s = xsocket(family, sotype, 0); 348 if (s == -1) 349 return; 350 351 errno = 0; 352 value = s; 353 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 354 if (sotype == SOCK_STREAM) { 355 if (!err || errno != EOPNOTSUPP) 356 FAIL_ERRNO("map_update: expected EOPNOTSUPP"); 357 } else if (err) 358 FAIL_ERRNO("map_update: expected success"); 359 xclose(s); 360 } 361 362 static void test_insert_bound(int family, int sotype, int mapfd) 363 { 364 struct sockaddr_storage addr; 365 socklen_t len; 366 u32 key = 0; 367 u64 value; 368 int err, s; 369 370 init_addr_loopback(family, &addr, &len); 371 372 s = xsocket(family, sotype, 0); 373 if (s == -1) 374 return; 375 376 err = xbind(s, sockaddr(&addr), len); 377 if (err) 378 goto close; 379 380 errno = 0; 381 value = s; 382 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 383 if (!err || errno != EOPNOTSUPP) 384 FAIL_ERRNO("map_update: expected EOPNOTSUPP"); 385 close: 386 xclose(s); 387 } 388 389 static void test_insert(int family, int sotype, int mapfd) 390 { 391 u64 value; 392 u32 key; 393 int s; 394 395 s = socket_loopback(family, sotype); 396 if (s < 0) 397 return; 398 399 key = 0; 400 value = s; 401 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 402 xclose(s); 403 } 404 405 static void test_delete_after_insert(int family, int sotype, int mapfd) 406 { 407 u64 value; 408 u32 key; 409 int s; 410 411 s = socket_loopback(family, sotype); 412 if (s < 0) 413 return; 414 415 key = 0; 416 value = s; 417 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 418 xbpf_map_delete_elem(mapfd, &key); 419 xclose(s); 420 } 421 422 static void test_delete_after_close(int family, int sotype, int mapfd) 423 { 424 int err, s; 425 u64 value; 426 u32 key; 427 428 s = socket_loopback(family, sotype); 429 if (s < 0) 430 return; 431 432 key = 0; 433 value = s; 434 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 435 436 xclose(s); 437 438 errno = 0; 439 err = bpf_map_delete_elem(mapfd, &key); 440 if (!err || (errno != EINVAL && errno != ENOENT)) 441 /* SOCKMAP and SOCKHASH return different error codes */ 442 FAIL_ERRNO("map_delete: expected EINVAL/EINVAL"); 443 } 444 445 static void test_lookup_after_insert(int family, int sotype, int mapfd) 446 { 447 u64 cookie, value; 448 socklen_t len; 449 u32 key; 450 int s; 451 452 s = socket_loopback(family, sotype); 453 if (s < 0) 454 return; 455 456 key = 0; 457 value = s; 458 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 459 460 len = sizeof(cookie); 461 xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len); 462 463 xbpf_map_lookup_elem(mapfd, &key, &value); 464 465 if (value != cookie) { 466 FAIL("map_lookup: have %#llx, want %#llx", 467 (unsigned long long)value, (unsigned long long)cookie); 468 } 469 470 xclose(s); 471 } 472 473 static void test_lookup_after_delete(int family, int sotype, int mapfd) 474 { 475 int err, s; 476 u64 value; 477 u32 key; 478 479 s = socket_loopback(family, sotype); 480 if (s < 0) 481 return; 482 483 key = 0; 484 value = s; 485 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 486 xbpf_map_delete_elem(mapfd, &key); 487 488 errno = 0; 489 err = bpf_map_lookup_elem(mapfd, &key, &value); 490 if (!err || errno != ENOENT) 491 FAIL_ERRNO("map_lookup: expected ENOENT"); 492 493 xclose(s); 494 } 495 496 static void test_lookup_32_bit_value(int family, int sotype, int mapfd) 497 { 498 u32 key, value32; 499 int err, s; 500 501 s = socket_loopback(family, sotype); 502 if (s < 0) 503 return; 504 505 mapfd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(key), 506 sizeof(value32), 1, NULL); 507 if (mapfd < 0) { 508 FAIL_ERRNO("map_create"); 509 goto close; 510 } 511 512 key = 0; 513 value32 = s; 514 xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST); 515 516 errno = 0; 517 err = bpf_map_lookup_elem(mapfd, &key, &value32); 518 if (!err || errno != ENOSPC) 519 FAIL_ERRNO("map_lookup: expected ENOSPC"); 520 521 xclose(mapfd); 522 close: 523 xclose(s); 524 } 525 526 static void test_update_existing(int family, int sotype, int mapfd) 527 { 528 int s1, s2; 529 u64 value; 530 u32 key; 531 532 s1 = socket_loopback(family, sotype); 533 if (s1 < 0) 534 return; 535 536 s2 = socket_loopback(family, sotype); 537 if (s2 < 0) 538 goto close_s1; 539 540 key = 0; 541 value = s1; 542 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 543 544 value = s2; 545 xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST); 546 xclose(s2); 547 close_s1: 548 xclose(s1); 549 } 550 551 /* Exercise the code path where we destroy child sockets that never 552 * got accept()'ed, aka orphans, when parent socket gets closed. 553 */ 554 static void test_destroy_orphan_child(int family, int sotype, int mapfd) 555 { 556 struct sockaddr_storage addr; 557 socklen_t len; 558 int err, s, c; 559 u64 value; 560 u32 key; 561 562 s = socket_loopback(family, sotype); 563 if (s < 0) 564 return; 565 566 len = sizeof(addr); 567 err = xgetsockname(s, sockaddr(&addr), &len); 568 if (err) 569 goto close_srv; 570 571 key = 0; 572 value = s; 573 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 574 575 c = xsocket(family, sotype, 0); 576 if (c == -1) 577 goto close_srv; 578 579 xconnect(c, sockaddr(&addr), len); 580 xclose(c); 581 close_srv: 582 xclose(s); 583 } 584 585 /* Perform a passive open after removing listening socket from SOCKMAP 586 * to ensure that callbacks get restored properly. 587 */ 588 static void test_clone_after_delete(int family, int sotype, int mapfd) 589 { 590 struct sockaddr_storage addr; 591 socklen_t len; 592 int err, s, c; 593 u64 value; 594 u32 key; 595 596 s = socket_loopback(family, sotype); 597 if (s < 0) 598 return; 599 600 len = sizeof(addr); 601 err = xgetsockname(s, sockaddr(&addr), &len); 602 if (err) 603 goto close_srv; 604 605 key = 0; 606 value = s; 607 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 608 xbpf_map_delete_elem(mapfd, &key); 609 610 c = xsocket(family, sotype, 0); 611 if (c < 0) 612 goto close_srv; 613 614 xconnect(c, sockaddr(&addr), len); 615 xclose(c); 616 close_srv: 617 xclose(s); 618 } 619 620 /* Check that child socket that got created while parent was in a 621 * SOCKMAP, but got accept()'ed only after the parent has been removed 622 * from SOCKMAP, gets cloned without parent psock state or callbacks. 623 */ 624 static void test_accept_after_delete(int family, int sotype, int mapfd) 625 { 626 struct sockaddr_storage addr; 627 const u32 zero = 0; 628 int err, s, c, p; 629 socklen_t len; 630 u64 value; 631 632 s = socket_loopback(family, sotype | SOCK_NONBLOCK); 633 if (s == -1) 634 return; 635 636 len = sizeof(addr); 637 err = xgetsockname(s, sockaddr(&addr), &len); 638 if (err) 639 goto close_srv; 640 641 value = s; 642 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); 643 if (err) 644 goto close_srv; 645 646 c = xsocket(family, sotype, 0); 647 if (c == -1) 648 goto close_srv; 649 650 /* Create child while parent is in sockmap */ 651 err = xconnect(c, sockaddr(&addr), len); 652 if (err) 653 goto close_cli; 654 655 /* Remove parent from sockmap */ 656 err = xbpf_map_delete_elem(mapfd, &zero); 657 if (err) 658 goto close_cli; 659 660 p = xaccept_nonblock(s, NULL, NULL); 661 if (p == -1) 662 goto close_cli; 663 664 /* Check that child sk_user_data is not set */ 665 value = p; 666 xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); 667 668 xclose(p); 669 close_cli: 670 xclose(c); 671 close_srv: 672 xclose(s); 673 } 674 675 /* Check that child socket that got created and accepted while parent 676 * was in a SOCKMAP is cloned without parent psock state or callbacks. 677 */ 678 static void test_accept_before_delete(int family, int sotype, int mapfd) 679 { 680 struct sockaddr_storage addr; 681 const u32 zero = 0, one = 1; 682 int err, s, c, p; 683 socklen_t len; 684 u64 value; 685 686 s = socket_loopback(family, sotype | SOCK_NONBLOCK); 687 if (s == -1) 688 return; 689 690 len = sizeof(addr); 691 err = xgetsockname(s, sockaddr(&addr), &len); 692 if (err) 693 goto close_srv; 694 695 value = s; 696 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); 697 if (err) 698 goto close_srv; 699 700 c = xsocket(family, sotype, 0); 701 if (c == -1) 702 goto close_srv; 703 704 /* Create & accept child while parent is in sockmap */ 705 err = xconnect(c, sockaddr(&addr), len); 706 if (err) 707 goto close_cli; 708 709 p = xaccept_nonblock(s, NULL, NULL); 710 if (p == -1) 711 goto close_cli; 712 713 /* Check that child sk_user_data is not set */ 714 value = p; 715 xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST); 716 717 xclose(p); 718 close_cli: 719 xclose(c); 720 close_srv: 721 xclose(s); 722 } 723 724 struct connect_accept_ctx { 725 int sockfd; 726 unsigned int done; 727 unsigned int nr_iter; 728 }; 729 730 static bool is_thread_done(struct connect_accept_ctx *ctx) 731 { 732 return READ_ONCE(ctx->done); 733 } 734 735 static void *connect_accept_thread(void *arg) 736 { 737 struct connect_accept_ctx *ctx = arg; 738 struct sockaddr_storage addr; 739 int family, socktype; 740 socklen_t len; 741 int err, i, s; 742 743 s = ctx->sockfd; 744 745 len = sizeof(addr); 746 err = xgetsockname(s, sockaddr(&addr), &len); 747 if (err) 748 goto done; 749 750 len = sizeof(family); 751 err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len); 752 if (err) 753 goto done; 754 755 len = sizeof(socktype); 756 err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len); 757 if (err) 758 goto done; 759 760 for (i = 0; i < ctx->nr_iter; i++) { 761 int c, p; 762 763 c = xsocket(family, socktype, 0); 764 if (c < 0) 765 break; 766 767 err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr)); 768 if (err) { 769 xclose(c); 770 break; 771 } 772 773 p = xaccept_nonblock(s, NULL, NULL); 774 if (p < 0) { 775 xclose(c); 776 break; 777 } 778 779 xclose(p); 780 xclose(c); 781 } 782 done: 783 WRITE_ONCE(ctx->done, 1); 784 return NULL; 785 } 786 787 static void test_syn_recv_insert_delete(int family, int sotype, int mapfd) 788 { 789 struct connect_accept_ctx ctx = { 0 }; 790 struct sockaddr_storage addr; 791 socklen_t len; 792 u32 zero = 0; 793 pthread_t t; 794 int err, s; 795 u64 value; 796 797 s = socket_loopback(family, sotype | SOCK_NONBLOCK); 798 if (s < 0) 799 return; 800 801 len = sizeof(addr); 802 err = xgetsockname(s, sockaddr(&addr), &len); 803 if (err) 804 goto close; 805 806 ctx.sockfd = s; 807 ctx.nr_iter = 1000; 808 809 err = xpthread_create(&t, NULL, connect_accept_thread, &ctx); 810 if (err) 811 goto close; 812 813 value = s; 814 while (!is_thread_done(&ctx)) { 815 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); 816 if (err) 817 break; 818 819 err = xbpf_map_delete_elem(mapfd, &zero); 820 if (err) 821 break; 822 } 823 824 xpthread_join(t, NULL); 825 close: 826 xclose(s); 827 } 828 829 static void *listen_thread(void *arg) 830 { 831 struct sockaddr unspec = { AF_UNSPEC }; 832 struct connect_accept_ctx *ctx = arg; 833 int err, i, s; 834 835 s = ctx->sockfd; 836 837 for (i = 0; i < ctx->nr_iter; i++) { 838 err = xlisten(s, 1); 839 if (err) 840 break; 841 err = xconnect(s, &unspec, sizeof(unspec)); 842 if (err) 843 break; 844 } 845 846 WRITE_ONCE(ctx->done, 1); 847 return NULL; 848 } 849 850 static void test_race_insert_listen(int family, int socktype, int mapfd) 851 { 852 struct connect_accept_ctx ctx = { 0 }; 853 const u32 zero = 0; 854 const int one = 1; 855 pthread_t t; 856 int err, s; 857 u64 value; 858 859 s = xsocket(family, socktype, 0); 860 if (s < 0) 861 return; 862 863 err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); 864 if (err) 865 goto close; 866 867 ctx.sockfd = s; 868 ctx.nr_iter = 10000; 869 870 err = pthread_create(&t, NULL, listen_thread, &ctx); 871 if (err) 872 goto close; 873 874 value = s; 875 while (!is_thread_done(&ctx)) { 876 err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); 877 /* Expecting EOPNOTSUPP before listen() */ 878 if (err && errno != EOPNOTSUPP) { 879 FAIL_ERRNO("map_update"); 880 break; 881 } 882 883 err = bpf_map_delete_elem(mapfd, &zero); 884 /* Expecting no entry after unhash on connect(AF_UNSPEC) */ 885 if (err && errno != EINVAL && errno != ENOENT) { 886 FAIL_ERRNO("map_delete"); 887 break; 888 } 889 } 890 891 xpthread_join(t, NULL); 892 close: 893 xclose(s); 894 } 895 896 static void zero_verdict_count(int mapfd) 897 { 898 unsigned int zero = 0; 899 int key; 900 901 key = SK_DROP; 902 xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY); 903 key = SK_PASS; 904 xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY); 905 } 906 907 enum redir_mode { 908 REDIR_INGRESS, 909 REDIR_EGRESS, 910 }; 911 912 static const char *redir_mode_str(enum redir_mode mode) 913 { 914 switch (mode) { 915 case REDIR_INGRESS: 916 return "ingress"; 917 case REDIR_EGRESS: 918 return "egress"; 919 default: 920 return "unknown"; 921 } 922 } 923 924 static int add_to_sockmap(int sock_mapfd, int fd1, int fd2) 925 { 926 u64 value; 927 u32 key; 928 int err; 929 930 key = 0; 931 value = fd1; 932 err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); 933 if (err) 934 return err; 935 936 key = 1; 937 value = fd2; 938 return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); 939 } 940 941 static void redir_to_connected(int family, int sotype, int sock_mapfd, 942 int verd_mapfd, enum redir_mode mode) 943 { 944 const char *log_prefix = redir_mode_str(mode); 945 struct sockaddr_storage addr; 946 int s, c0, c1, p0, p1; 947 unsigned int pass; 948 socklen_t len; 949 int err, n; 950 u32 key; 951 char b; 952 953 zero_verdict_count(verd_mapfd); 954 955 s = socket_loopback(family, sotype | SOCK_NONBLOCK); 956 if (s < 0) 957 return; 958 959 len = sizeof(addr); 960 err = xgetsockname(s, sockaddr(&addr), &len); 961 if (err) 962 goto close_srv; 963 964 c0 = xsocket(family, sotype, 0); 965 if (c0 < 0) 966 goto close_srv; 967 err = xconnect(c0, sockaddr(&addr), len); 968 if (err) 969 goto close_cli0; 970 971 p0 = xaccept_nonblock(s, NULL, NULL); 972 if (p0 < 0) 973 goto close_cli0; 974 975 c1 = xsocket(family, sotype, 0); 976 if (c1 < 0) 977 goto close_peer0; 978 err = xconnect(c1, sockaddr(&addr), len); 979 if (err) 980 goto close_cli1; 981 982 p1 = xaccept_nonblock(s, NULL, NULL); 983 if (p1 < 0) 984 goto close_cli1; 985 986 err = add_to_sockmap(sock_mapfd, p0, p1); 987 if (err) 988 goto close_peer1; 989 990 n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1); 991 if (n < 0) 992 FAIL_ERRNO("%s: write", log_prefix); 993 if (n == 0) 994 FAIL("%s: incomplete write", log_prefix); 995 if (n < 1) 996 goto close_peer1; 997 998 key = SK_PASS; 999 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); 1000 if (err) 1001 goto close_peer1; 1002 if (pass != 1) 1003 FAIL("%s: want pass count 1, have %d", log_prefix, pass); 1004 n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC); 1005 if (n < 0) 1006 FAIL_ERRNO("%s: recv_timeout", log_prefix); 1007 if (n == 0) 1008 FAIL("%s: incomplete recv", log_prefix); 1009 1010 close_peer1: 1011 xclose(p1); 1012 close_cli1: 1013 xclose(c1); 1014 close_peer0: 1015 xclose(p0); 1016 close_cli0: 1017 xclose(c0); 1018 close_srv: 1019 xclose(s); 1020 } 1021 1022 static void test_skb_redir_to_connected(struct test_sockmap_listen *skel, 1023 struct bpf_map *inner_map, int family, 1024 int sotype) 1025 { 1026 int verdict = bpf_program__fd(skel->progs.prog_stream_verdict); 1027 int parser = bpf_program__fd(skel->progs.prog_stream_parser); 1028 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 1029 int sock_map = bpf_map__fd(inner_map); 1030 int err; 1031 1032 err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0); 1033 if (err) 1034 return; 1035 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0); 1036 if (err) 1037 goto detach; 1038 1039 redir_to_connected(family, sotype, sock_map, verdict_map, 1040 REDIR_INGRESS); 1041 1042 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT); 1043 detach: 1044 xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER); 1045 } 1046 1047 static void test_msg_redir_to_connected(struct test_sockmap_listen *skel, 1048 struct bpf_map *inner_map, int family, 1049 int sotype) 1050 { 1051 int verdict = bpf_program__fd(skel->progs.prog_msg_verdict); 1052 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 1053 int sock_map = bpf_map__fd(inner_map); 1054 int err; 1055 1056 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0); 1057 if (err) 1058 return; 1059 1060 redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS); 1061 1062 xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT); 1063 } 1064 1065 static void redir_to_listening(int family, int sotype, int sock_mapfd, 1066 int verd_mapfd, enum redir_mode mode) 1067 { 1068 const char *log_prefix = redir_mode_str(mode); 1069 struct sockaddr_storage addr; 1070 int s, c, p, err, n; 1071 unsigned int drop; 1072 socklen_t len; 1073 u32 key; 1074 1075 zero_verdict_count(verd_mapfd); 1076 1077 s = socket_loopback(family, sotype | SOCK_NONBLOCK); 1078 if (s < 0) 1079 return; 1080 1081 len = sizeof(addr); 1082 err = xgetsockname(s, sockaddr(&addr), &len); 1083 if (err) 1084 goto close_srv; 1085 1086 c = xsocket(family, sotype, 0); 1087 if (c < 0) 1088 goto close_srv; 1089 err = xconnect(c, sockaddr(&addr), len); 1090 if (err) 1091 goto close_cli; 1092 1093 p = xaccept_nonblock(s, NULL, NULL); 1094 if (p < 0) 1095 goto close_cli; 1096 1097 err = add_to_sockmap(sock_mapfd, s, p); 1098 if (err) 1099 goto close_peer; 1100 1101 n = write(mode == REDIR_INGRESS ? c : p, "a", 1); 1102 if (n < 0 && errno != EACCES) 1103 FAIL_ERRNO("%s: write", log_prefix); 1104 if (n == 0) 1105 FAIL("%s: incomplete write", log_prefix); 1106 if (n < 1) 1107 goto close_peer; 1108 1109 key = SK_DROP; 1110 err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop); 1111 if (err) 1112 goto close_peer; 1113 if (drop != 1) 1114 FAIL("%s: want drop count 1, have %d", log_prefix, drop); 1115 1116 close_peer: 1117 xclose(p); 1118 close_cli: 1119 xclose(c); 1120 close_srv: 1121 xclose(s); 1122 } 1123 1124 static void test_skb_redir_to_listening(struct test_sockmap_listen *skel, 1125 struct bpf_map *inner_map, int family, 1126 int sotype) 1127 { 1128 int verdict = bpf_program__fd(skel->progs.prog_stream_verdict); 1129 int parser = bpf_program__fd(skel->progs.prog_stream_parser); 1130 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 1131 int sock_map = bpf_map__fd(inner_map); 1132 int err; 1133 1134 err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0); 1135 if (err) 1136 return; 1137 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0); 1138 if (err) 1139 goto detach; 1140 1141 redir_to_listening(family, sotype, sock_map, verdict_map, 1142 REDIR_INGRESS); 1143 1144 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT); 1145 detach: 1146 xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER); 1147 } 1148 1149 static void test_msg_redir_to_listening(struct test_sockmap_listen *skel, 1150 struct bpf_map *inner_map, int family, 1151 int sotype) 1152 { 1153 int verdict = bpf_program__fd(skel->progs.prog_msg_verdict); 1154 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 1155 int sock_map = bpf_map__fd(inner_map); 1156 int err; 1157 1158 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0); 1159 if (err) 1160 return; 1161 1162 redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS); 1163 1164 xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT); 1165 } 1166 1167 static void test_reuseport_select_listening(int family, int sotype, 1168 int sock_map, int verd_map, 1169 int reuseport_prog) 1170 { 1171 struct sockaddr_storage addr; 1172 unsigned int pass; 1173 int s, c, err; 1174 socklen_t len; 1175 u64 value; 1176 u32 key; 1177 1178 zero_verdict_count(verd_map); 1179 1180 s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK, 1181 reuseport_prog); 1182 if (s < 0) 1183 return; 1184 1185 len = sizeof(addr); 1186 err = xgetsockname(s, sockaddr(&addr), &len); 1187 if (err) 1188 goto close_srv; 1189 1190 key = 0; 1191 value = s; 1192 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST); 1193 if (err) 1194 goto close_srv; 1195 1196 c = xsocket(family, sotype, 0); 1197 if (c < 0) 1198 goto close_srv; 1199 err = xconnect(c, sockaddr(&addr), len); 1200 if (err) 1201 goto close_cli; 1202 1203 if (sotype == SOCK_STREAM) { 1204 int p; 1205 1206 p = xaccept_nonblock(s, NULL, NULL); 1207 if (p < 0) 1208 goto close_cli; 1209 xclose(p); 1210 } else { 1211 char b = 'a'; 1212 ssize_t n; 1213 1214 n = xsend(c, &b, sizeof(b), 0); 1215 if (n == -1) 1216 goto close_cli; 1217 1218 n = xrecv_nonblock(s, &b, sizeof(b), 0); 1219 if (n == -1) 1220 goto close_cli; 1221 } 1222 1223 key = SK_PASS; 1224 err = xbpf_map_lookup_elem(verd_map, &key, &pass); 1225 if (err) 1226 goto close_cli; 1227 if (pass != 1) 1228 FAIL("want pass count 1, have %d", pass); 1229 1230 close_cli: 1231 xclose(c); 1232 close_srv: 1233 xclose(s); 1234 } 1235 1236 static void test_reuseport_select_connected(int family, int sotype, 1237 int sock_map, int verd_map, 1238 int reuseport_prog) 1239 { 1240 struct sockaddr_storage addr; 1241 int s, c0, c1, p0, err; 1242 unsigned int drop; 1243 socklen_t len; 1244 u64 value; 1245 u32 key; 1246 1247 zero_verdict_count(verd_map); 1248 1249 s = socket_loopback_reuseport(family, sotype, reuseport_prog); 1250 if (s < 0) 1251 return; 1252 1253 /* Populate sock_map[0] to avoid ENOENT on first connection */ 1254 key = 0; 1255 value = s; 1256 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST); 1257 if (err) 1258 goto close_srv; 1259 1260 len = sizeof(addr); 1261 err = xgetsockname(s, sockaddr(&addr), &len); 1262 if (err) 1263 goto close_srv; 1264 1265 c0 = xsocket(family, sotype, 0); 1266 if (c0 < 0) 1267 goto close_srv; 1268 1269 err = xconnect(c0, sockaddr(&addr), len); 1270 if (err) 1271 goto close_cli0; 1272 1273 if (sotype == SOCK_STREAM) { 1274 p0 = xaccept_nonblock(s, NULL, NULL); 1275 if (p0 < 0) 1276 goto close_cli0; 1277 } else { 1278 p0 = xsocket(family, sotype, 0); 1279 if (p0 < 0) 1280 goto close_cli0; 1281 1282 len = sizeof(addr); 1283 err = xgetsockname(c0, sockaddr(&addr), &len); 1284 if (err) 1285 goto close_cli0; 1286 1287 err = xconnect(p0, sockaddr(&addr), len); 1288 if (err) 1289 goto close_cli0; 1290 } 1291 1292 /* Update sock_map[0] to redirect to a connected socket */ 1293 key = 0; 1294 value = p0; 1295 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST); 1296 if (err) 1297 goto close_peer0; 1298 1299 c1 = xsocket(family, sotype, 0); 1300 if (c1 < 0) 1301 goto close_peer0; 1302 1303 len = sizeof(addr); 1304 err = xgetsockname(s, sockaddr(&addr), &len); 1305 if (err) 1306 goto close_srv; 1307 1308 errno = 0; 1309 err = connect(c1, sockaddr(&addr), len); 1310 if (sotype == SOCK_DGRAM) { 1311 char b = 'a'; 1312 ssize_t n; 1313 1314 n = xsend(c1, &b, sizeof(b), 0); 1315 if (n == -1) 1316 goto close_cli1; 1317 1318 n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC); 1319 err = n == -1; 1320 } 1321 if (!err || errno != ECONNREFUSED) 1322 FAIL_ERRNO("connect: expected ECONNREFUSED"); 1323 1324 key = SK_DROP; 1325 err = xbpf_map_lookup_elem(verd_map, &key, &drop); 1326 if (err) 1327 goto close_cli1; 1328 if (drop != 1) 1329 FAIL("want drop count 1, have %d", drop); 1330 1331 close_cli1: 1332 xclose(c1); 1333 close_peer0: 1334 xclose(p0); 1335 close_cli0: 1336 xclose(c0); 1337 close_srv: 1338 xclose(s); 1339 } 1340 1341 /* Check that redirecting across reuseport groups is not allowed. */ 1342 static void test_reuseport_mixed_groups(int family, int sotype, int sock_map, 1343 int verd_map, int reuseport_prog) 1344 { 1345 struct sockaddr_storage addr; 1346 int s1, s2, c, err; 1347 unsigned int drop; 1348 socklen_t len; 1349 u32 key; 1350 1351 zero_verdict_count(verd_map); 1352 1353 /* Create two listeners, each in its own reuseport group */ 1354 s1 = socket_loopback_reuseport(family, sotype, reuseport_prog); 1355 if (s1 < 0) 1356 return; 1357 1358 s2 = socket_loopback_reuseport(family, sotype, reuseport_prog); 1359 if (s2 < 0) 1360 goto close_srv1; 1361 1362 err = add_to_sockmap(sock_map, s1, s2); 1363 if (err) 1364 goto close_srv2; 1365 1366 /* Connect to s2, reuseport BPF selects s1 via sock_map[0] */ 1367 len = sizeof(addr); 1368 err = xgetsockname(s2, sockaddr(&addr), &len); 1369 if (err) 1370 goto close_srv2; 1371 1372 c = xsocket(family, sotype, 0); 1373 if (c < 0) 1374 goto close_srv2; 1375 1376 err = connect(c, sockaddr(&addr), len); 1377 if (sotype == SOCK_DGRAM) { 1378 char b = 'a'; 1379 ssize_t n; 1380 1381 n = xsend(c, &b, sizeof(b), 0); 1382 if (n == -1) 1383 goto close_cli; 1384 1385 n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC); 1386 err = n == -1; 1387 } 1388 if (!err || errno != ECONNREFUSED) { 1389 FAIL_ERRNO("connect: expected ECONNREFUSED"); 1390 goto close_cli; 1391 } 1392 1393 /* Expect drop, can't redirect outside of reuseport group */ 1394 key = SK_DROP; 1395 err = xbpf_map_lookup_elem(verd_map, &key, &drop); 1396 if (err) 1397 goto close_cli; 1398 if (drop != 1) 1399 FAIL("want drop count 1, have %d", drop); 1400 1401 close_cli: 1402 xclose(c); 1403 close_srv2: 1404 xclose(s2); 1405 close_srv1: 1406 xclose(s1); 1407 } 1408 1409 #define TEST(fn, ...) \ 1410 { \ 1411 fn, #fn, __VA_ARGS__ \ 1412 } 1413 1414 static void test_ops_cleanup(const struct bpf_map *map) 1415 { 1416 const struct bpf_map_def *def; 1417 int err, mapfd; 1418 u32 key; 1419 1420 def = bpf_map__def(map); 1421 mapfd = bpf_map__fd(map); 1422 1423 for (key = 0; key < def->max_entries; key++) { 1424 err = bpf_map_delete_elem(mapfd, &key); 1425 if (err && errno != EINVAL && errno != ENOENT) 1426 FAIL_ERRNO("map_delete: expected EINVAL/ENOENT"); 1427 } 1428 } 1429 1430 static const char *family_str(sa_family_t family) 1431 { 1432 switch (family) { 1433 case AF_INET: 1434 return "IPv4"; 1435 case AF_INET6: 1436 return "IPv6"; 1437 case AF_UNIX: 1438 return "Unix"; 1439 default: 1440 return "unknown"; 1441 } 1442 } 1443 1444 static const char *map_type_str(const struct bpf_map *map) 1445 { 1446 const struct bpf_map_def *def; 1447 1448 def = bpf_map__def(map); 1449 if (IS_ERR(def)) 1450 return "invalid"; 1451 1452 switch (def->type) { 1453 case BPF_MAP_TYPE_SOCKMAP: 1454 return "sockmap"; 1455 case BPF_MAP_TYPE_SOCKHASH: 1456 return "sockhash"; 1457 default: 1458 return "unknown"; 1459 } 1460 } 1461 1462 static const char *sotype_str(int sotype) 1463 { 1464 switch (sotype) { 1465 case SOCK_DGRAM: 1466 return "UDP"; 1467 case SOCK_STREAM: 1468 return "TCP"; 1469 default: 1470 return "unknown"; 1471 } 1472 } 1473 1474 static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map, 1475 int family, int sotype) 1476 { 1477 const struct op_test { 1478 void (*fn)(int family, int sotype, int mapfd); 1479 const char *name; 1480 int sotype; 1481 } tests[] = { 1482 /* insert */ 1483 TEST(test_insert_invalid), 1484 TEST(test_insert_opened), 1485 TEST(test_insert_bound, SOCK_STREAM), 1486 TEST(test_insert), 1487 /* delete */ 1488 TEST(test_delete_after_insert), 1489 TEST(test_delete_after_close), 1490 /* lookup */ 1491 TEST(test_lookup_after_insert), 1492 TEST(test_lookup_after_delete), 1493 TEST(test_lookup_32_bit_value), 1494 /* update */ 1495 TEST(test_update_existing), 1496 /* races with insert/delete */ 1497 TEST(test_destroy_orphan_child, SOCK_STREAM), 1498 TEST(test_syn_recv_insert_delete, SOCK_STREAM), 1499 TEST(test_race_insert_listen, SOCK_STREAM), 1500 /* child clone */ 1501 TEST(test_clone_after_delete, SOCK_STREAM), 1502 TEST(test_accept_after_delete, SOCK_STREAM), 1503 TEST(test_accept_before_delete, SOCK_STREAM), 1504 }; 1505 const char *family_name, *map_name, *sotype_name; 1506 const struct op_test *t; 1507 char s[MAX_TEST_NAME]; 1508 int map_fd; 1509 1510 family_name = family_str(family); 1511 map_name = map_type_str(map); 1512 sotype_name = sotype_str(sotype); 1513 map_fd = bpf_map__fd(map); 1514 1515 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 1516 snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name, 1517 sotype_name, t->name); 1518 1519 if (t->sotype != 0 && t->sotype != sotype) 1520 continue; 1521 1522 if (!test__start_subtest(s)) 1523 continue; 1524 1525 t->fn(family, sotype, map_fd); 1526 test_ops_cleanup(map); 1527 } 1528 } 1529 1530 static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, 1531 int family, int sotype) 1532 { 1533 const struct redir_test { 1534 void (*fn)(struct test_sockmap_listen *skel, 1535 struct bpf_map *map, int family, int sotype); 1536 const char *name; 1537 } tests[] = { 1538 TEST(test_skb_redir_to_connected), 1539 TEST(test_skb_redir_to_listening), 1540 TEST(test_msg_redir_to_connected), 1541 TEST(test_msg_redir_to_listening), 1542 }; 1543 const char *family_name, *map_name; 1544 const struct redir_test *t; 1545 char s[MAX_TEST_NAME]; 1546 1547 family_name = family_str(family); 1548 map_name = map_type_str(map); 1549 1550 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 1551 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, 1552 t->name); 1553 1554 if (!test__start_subtest(s)) 1555 continue; 1556 1557 t->fn(skel, map, family, sotype); 1558 } 1559 } 1560 1561 static void unix_redir_to_connected(int sotype, int sock_mapfd, 1562 int verd_mapfd, enum redir_mode mode) 1563 { 1564 const char *log_prefix = redir_mode_str(mode); 1565 int c0, c1, p0, p1; 1566 unsigned int pass; 1567 int err, n; 1568 int sfd[2]; 1569 u32 key; 1570 char b; 1571 1572 zero_verdict_count(verd_mapfd); 1573 1574 if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) 1575 return; 1576 c0 = sfd[0], p0 = sfd[1]; 1577 1578 if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) 1579 goto close0; 1580 c1 = sfd[0], p1 = sfd[1]; 1581 1582 err = add_to_sockmap(sock_mapfd, p0, p1); 1583 if (err) 1584 goto close; 1585 1586 n = write(c1, "a", 1); 1587 if (n < 0) 1588 FAIL_ERRNO("%s: write", log_prefix); 1589 if (n == 0) 1590 FAIL("%s: incomplete write", log_prefix); 1591 if (n < 1) 1592 goto close; 1593 1594 key = SK_PASS; 1595 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); 1596 if (err) 1597 goto close; 1598 if (pass != 1) 1599 FAIL("%s: want pass count 1, have %d", log_prefix, pass); 1600 1601 n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); 1602 if (n < 0) 1603 FAIL_ERRNO("%s: recv_timeout", log_prefix); 1604 if (n == 0) 1605 FAIL("%s: incomplete recv", log_prefix); 1606 1607 close: 1608 xclose(c1); 1609 xclose(p1); 1610 close0: 1611 xclose(c0); 1612 xclose(p0); 1613 } 1614 1615 static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel, 1616 struct bpf_map *inner_map, int sotype) 1617 { 1618 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); 1619 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 1620 int sock_map = bpf_map__fd(inner_map); 1621 int err; 1622 1623 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); 1624 if (err) 1625 return; 1626 1627 skel->bss->test_ingress = false; 1628 unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS); 1629 skel->bss->test_ingress = true; 1630 unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS); 1631 1632 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); 1633 } 1634 1635 static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map, 1636 int sotype) 1637 { 1638 const char *family_name, *map_name; 1639 char s[MAX_TEST_NAME]; 1640 1641 family_name = family_str(AF_UNIX); 1642 map_name = map_type_str(map); 1643 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); 1644 if (!test__start_subtest(s)) 1645 return; 1646 unix_skb_redir_to_connected(skel, map, sotype); 1647 } 1648 1649 static void test_reuseport(struct test_sockmap_listen *skel, 1650 struct bpf_map *map, int family, int sotype) 1651 { 1652 const struct reuseport_test { 1653 void (*fn)(int family, int sotype, int socket_map, 1654 int verdict_map, int reuseport_prog); 1655 const char *name; 1656 int sotype; 1657 } tests[] = { 1658 TEST(test_reuseport_select_listening), 1659 TEST(test_reuseport_select_connected), 1660 TEST(test_reuseport_mixed_groups), 1661 }; 1662 int socket_map, verdict_map, reuseport_prog; 1663 const char *family_name, *map_name, *sotype_name; 1664 const struct reuseport_test *t; 1665 char s[MAX_TEST_NAME]; 1666 1667 family_name = family_str(family); 1668 map_name = map_type_str(map); 1669 sotype_name = sotype_str(sotype); 1670 1671 socket_map = bpf_map__fd(map); 1672 verdict_map = bpf_map__fd(skel->maps.verdict_map); 1673 reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport); 1674 1675 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 1676 snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name, 1677 sotype_name, t->name); 1678 1679 if (t->sotype != 0 && t->sotype != sotype) 1680 continue; 1681 1682 if (!test__start_subtest(s)) 1683 continue; 1684 1685 t->fn(family, sotype, socket_map, verdict_map, reuseport_prog); 1686 } 1687 } 1688 1689 static int inet_socketpair(int family, int type, int *s, int *c) 1690 { 1691 struct sockaddr_storage addr; 1692 socklen_t len; 1693 int p0, c0; 1694 int err; 1695 1696 p0 = socket_loopback(family, type | SOCK_NONBLOCK); 1697 if (p0 < 0) 1698 return p0; 1699 1700 len = sizeof(addr); 1701 err = xgetsockname(p0, sockaddr(&addr), &len); 1702 if (err) 1703 goto close_peer0; 1704 1705 c0 = xsocket(family, type | SOCK_NONBLOCK, 0); 1706 if (c0 < 0) { 1707 err = c0; 1708 goto close_peer0; 1709 } 1710 err = xconnect(c0, sockaddr(&addr), len); 1711 if (err) 1712 goto close_cli0; 1713 err = xgetsockname(c0, sockaddr(&addr), &len); 1714 if (err) 1715 goto close_cli0; 1716 err = xconnect(p0, sockaddr(&addr), len); 1717 if (err) 1718 goto close_cli0; 1719 1720 *s = p0; 1721 *c = c0; 1722 return 0; 1723 1724 close_cli0: 1725 xclose(c0); 1726 close_peer0: 1727 xclose(p0); 1728 return err; 1729 } 1730 1731 static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, 1732 enum redir_mode mode) 1733 { 1734 const char *log_prefix = redir_mode_str(mode); 1735 int c0, c1, p0, p1; 1736 unsigned int pass; 1737 int err, n; 1738 u32 key; 1739 char b; 1740 1741 zero_verdict_count(verd_mapfd); 1742 1743 err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0); 1744 if (err) 1745 return; 1746 err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1); 1747 if (err) 1748 goto close_cli0; 1749 1750 err = add_to_sockmap(sock_mapfd, p0, p1); 1751 if (err) 1752 goto close_cli1; 1753 1754 n = write(c1, "a", 1); 1755 if (n < 0) 1756 FAIL_ERRNO("%s: write", log_prefix); 1757 if (n == 0) 1758 FAIL("%s: incomplete write", log_prefix); 1759 if (n < 1) 1760 goto close_cli1; 1761 1762 key = SK_PASS; 1763 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); 1764 if (err) 1765 goto close_cli1; 1766 if (pass != 1) 1767 FAIL("%s: want pass count 1, have %d", log_prefix, pass); 1768 1769 n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); 1770 if (n < 0) 1771 FAIL_ERRNO("%s: recv_timeout", log_prefix); 1772 if (n == 0) 1773 FAIL("%s: incomplete recv", log_prefix); 1774 1775 close_cli1: 1776 xclose(c1); 1777 xclose(p1); 1778 close_cli0: 1779 xclose(c0); 1780 xclose(p0); 1781 } 1782 1783 static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel, 1784 struct bpf_map *inner_map, int family) 1785 { 1786 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); 1787 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 1788 int sock_map = bpf_map__fd(inner_map); 1789 int err; 1790 1791 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); 1792 if (err) 1793 return; 1794 1795 skel->bss->test_ingress = false; 1796 udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS); 1797 skel->bss->test_ingress = true; 1798 udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS); 1799 1800 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); 1801 } 1802 1803 static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map, 1804 int family) 1805 { 1806 const char *family_name, *map_name; 1807 char s[MAX_TEST_NAME]; 1808 1809 family_name = family_str(family); 1810 map_name = map_type_str(map); 1811 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); 1812 if (!test__start_subtest(s)) 1813 return; 1814 udp_skb_redir_to_connected(skel, map, family); 1815 } 1816 1817 static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, 1818 int verd_mapfd, enum redir_mode mode) 1819 { 1820 const char *log_prefix = redir_mode_str(mode); 1821 int c0, c1, p0, p1; 1822 unsigned int pass; 1823 int err, n; 1824 int sfd[2]; 1825 u32 key; 1826 char b; 1827 1828 zero_verdict_count(verd_mapfd); 1829 1830 if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd)) 1831 return; 1832 c0 = sfd[0], p0 = sfd[1]; 1833 1834 err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1); 1835 if (err) 1836 goto close; 1837 1838 err = add_to_sockmap(sock_mapfd, p0, p1); 1839 if (err) 1840 goto close_cli1; 1841 1842 n = write(c1, "a", 1); 1843 if (n < 0) 1844 FAIL_ERRNO("%s: write", log_prefix); 1845 if (n == 0) 1846 FAIL("%s: incomplete write", log_prefix); 1847 if (n < 1) 1848 goto close_cli1; 1849 1850 key = SK_PASS; 1851 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); 1852 if (err) 1853 goto close_cli1; 1854 if (pass != 1) 1855 FAIL("%s: want pass count 1, have %d", log_prefix, pass); 1856 1857 n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); 1858 if (n < 0) 1859 FAIL_ERRNO("%s: recv_timeout", log_prefix); 1860 if (n == 0) 1861 FAIL("%s: incomplete recv", log_prefix); 1862 1863 close_cli1: 1864 xclose(c1); 1865 xclose(p1); 1866 close: 1867 xclose(c0); 1868 xclose(p0); 1869 } 1870 1871 static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel, 1872 struct bpf_map *inner_map, int family) 1873 { 1874 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); 1875 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 1876 int sock_map = bpf_map__fd(inner_map); 1877 int err; 1878 1879 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); 1880 if (err) 1881 return; 1882 1883 skel->bss->test_ingress = false; 1884 inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, 1885 REDIR_EGRESS); 1886 inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map, 1887 REDIR_EGRESS); 1888 skel->bss->test_ingress = true; 1889 inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, 1890 REDIR_INGRESS); 1891 inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map, 1892 REDIR_INGRESS); 1893 1894 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); 1895 } 1896 1897 static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, 1898 int verd_mapfd, enum redir_mode mode) 1899 { 1900 const char *log_prefix = redir_mode_str(mode); 1901 int c0, c1, p0, p1; 1902 unsigned int pass; 1903 int err, n; 1904 int sfd[2]; 1905 u32 key; 1906 char b; 1907 1908 zero_verdict_count(verd_mapfd); 1909 1910 err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0); 1911 if (err) 1912 return; 1913 1914 if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd)) 1915 goto close_cli0; 1916 c1 = sfd[0], p1 = sfd[1]; 1917 1918 err = add_to_sockmap(sock_mapfd, p0, p1); 1919 if (err) 1920 goto close; 1921 1922 n = write(c1, "a", 1); 1923 if (n < 0) 1924 FAIL_ERRNO("%s: write", log_prefix); 1925 if (n == 0) 1926 FAIL("%s: incomplete write", log_prefix); 1927 if (n < 1) 1928 goto close; 1929 1930 key = SK_PASS; 1931 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); 1932 if (err) 1933 goto close; 1934 if (pass != 1) 1935 FAIL("%s: want pass count 1, have %d", log_prefix, pass); 1936 1937 n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); 1938 if (n < 0) 1939 FAIL_ERRNO("%s: recv_timeout", log_prefix); 1940 if (n == 0) 1941 FAIL("%s: incomplete recv", log_prefix); 1942 1943 close: 1944 xclose(c1); 1945 xclose(p1); 1946 close_cli0: 1947 xclose(c0); 1948 xclose(p0); 1949 1950 } 1951 1952 static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel, 1953 struct bpf_map *inner_map, int family) 1954 { 1955 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); 1956 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 1957 int sock_map = bpf_map__fd(inner_map); 1958 int err; 1959 1960 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); 1961 if (err) 1962 return; 1963 1964 skel->bss->test_ingress = false; 1965 unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, 1966 REDIR_EGRESS); 1967 unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map, 1968 REDIR_EGRESS); 1969 skel->bss->test_ingress = true; 1970 unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, 1971 REDIR_INGRESS); 1972 unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map, 1973 REDIR_INGRESS); 1974 1975 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); 1976 } 1977 1978 static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map, 1979 int family) 1980 { 1981 const char *family_name, *map_name; 1982 char s[MAX_TEST_NAME]; 1983 1984 family_name = family_str(family); 1985 map_name = map_type_str(map); 1986 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); 1987 if (!test__start_subtest(s)) 1988 return; 1989 inet_unix_skb_redir_to_connected(skel, map, family); 1990 unix_inet_skb_redir_to_connected(skel, map, family); 1991 } 1992 1993 static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, 1994 int family) 1995 { 1996 test_ops(skel, map, family, SOCK_STREAM); 1997 test_ops(skel, map, family, SOCK_DGRAM); 1998 test_redir(skel, map, family, SOCK_STREAM); 1999 test_reuseport(skel, map, family, SOCK_STREAM); 2000 test_reuseport(skel, map, family, SOCK_DGRAM); 2001 test_udp_redir(skel, map, family); 2002 test_udp_unix_redir(skel, map, family); 2003 } 2004 2005 void serial_test_sockmap_listen(void) 2006 { 2007 struct test_sockmap_listen *skel; 2008 2009 skel = test_sockmap_listen__open_and_load(); 2010 if (!skel) { 2011 FAIL("skeleton open/load failed"); 2012 return; 2013 } 2014 2015 skel->bss->test_sockmap = true; 2016 run_tests(skel, skel->maps.sock_map, AF_INET); 2017 run_tests(skel, skel->maps.sock_map, AF_INET6); 2018 test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM); 2019 test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM); 2020 2021 skel->bss->test_sockmap = false; 2022 run_tests(skel, skel->maps.sock_hash, AF_INET); 2023 run_tests(skel, skel->maps.sock_hash, AF_INET6); 2024 test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM); 2025 test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM); 2026 2027 test_sockmap_listen__destroy(skel); 2028 } 2029