1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 5 #include <errno.h> 6 #include <limits.h> 7 #include <fcntl.h> 8 #include <string.h> 9 #include <stdarg.h> 10 #include <stdbool.h> 11 #include <stdint.h> 12 #include <stdio.h> 13 #include <stdlib.h> 14 #include <strings.h> 15 #include <signal.h> 16 #include <unistd.h> 17 #include <time.h> 18 19 #include <sys/ioctl.h> 20 #include <sys/poll.h> 21 #include <sys/sendfile.h> 22 #include <sys/stat.h> 23 #include <sys/socket.h> 24 #include <sys/types.h> 25 #include <sys/mman.h> 26 27 #include <netdb.h> 28 #include <netinet/in.h> 29 30 #include <linux/tcp.h> 31 #include <linux/time_types.h> 32 #include <linux/sockios.h> 33 34 extern int optind; 35 36 #ifndef IPPROTO_MPTCP 37 #define IPPROTO_MPTCP 262 38 #endif 39 #ifndef TCP_ULP 40 #define TCP_ULP 31 41 #endif 42 43 static int poll_timeout = 10 * 1000; 44 static bool listen_mode; 45 static bool quit; 46 47 enum cfg_mode { 48 CFG_MODE_POLL, 49 CFG_MODE_MMAP, 50 CFG_MODE_SENDFILE, 51 }; 52 53 enum cfg_peek { 54 CFG_NONE_PEEK, 55 CFG_WITH_PEEK, 56 CFG_AFTER_PEEK, 57 }; 58 59 static enum cfg_mode cfg_mode = CFG_MODE_POLL; 60 static enum cfg_peek cfg_peek = CFG_NONE_PEEK; 61 static const char *cfg_host; 62 static const char *cfg_port = "12000"; 63 static int cfg_sock_proto = IPPROTO_MPTCP; 64 static int pf = AF_INET; 65 static int cfg_sndbuf; 66 static int cfg_rcvbuf; 67 static bool cfg_join; 68 static bool cfg_remove; 69 static unsigned int cfg_time; 70 static unsigned int cfg_do_w; 71 static int cfg_wait; 72 static uint32_t cfg_mark; 73 static char *cfg_input; 74 static int cfg_repeat = 1; 75 static int cfg_truncate; 76 static int cfg_rcv_trunc; 77 78 struct cfg_cmsg_types { 79 unsigned int cmsg_enabled:1; 80 unsigned int timestampns:1; 81 unsigned int tcp_inq:1; 82 }; 83 84 struct cfg_sockopt_types { 85 unsigned int transparent:1; 86 unsigned int mptfo:1; 87 }; 88 89 struct tcp_inq_state { 90 unsigned int last; 91 bool expect_eof; 92 }; 93 94 struct wstate { 95 char buf[8192]; 96 unsigned int len; 97 unsigned int off; 98 unsigned int total_len; 99 }; 100 101 static struct tcp_inq_state tcp_inq; 102 103 static struct cfg_cmsg_types cfg_cmsg_types; 104 static struct cfg_sockopt_types cfg_sockopt_types; 105 106 static void die_usage(void) 107 { 108 fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-f offset] [-i file] [-I num] [-j] [-l] " 109 "[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-j] [-l] [-r num] " 110 "[-s MPTCP|TCP] [-S num] [-r num] [-t num] [-T num] [-u] [-w sec] connect_address\n"); 111 fprintf(stderr, "\t-6 use ipv6\n"); 112 fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n"); 113 fprintf(stderr, "\t-f offset -- stop the I/O after receiving and sending the specified amount " 114 "of bytes. If there are unread bytes in the receive queue, that will cause a MPTCP " 115 "fastclose at close/shutdown. If offset is negative, expect the peer to close before " 116 "all the local data as been sent, thus toleration errors on write and EPIPE signals\n"); 117 fprintf(stderr, "\t-i file -- read the data to send from the given file instead of stdin"); 118 fprintf(stderr, "\t-I num -- repeat the transfer 'num' times. In listen mode accepts num " 119 "incoming connections, in client mode, disconnect and reconnect to the server\n"); 120 fprintf(stderr, "\t-j -- add additional sleep at connection start and tear down " 121 "-- for MPJ tests\n"); 122 fprintf(stderr, "\t-l -- listens mode, accepts incoming connection\n"); 123 fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n"); 124 fprintf(stderr, "\t-M mark -- set socket packet mark\n"); 125 fprintf(stderr, "\t-o option -- test sockopt <option>\n"); 126 fprintf(stderr, "\t-p num -- use port num\n"); 127 fprintf(stderr, 128 "\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n"); 129 fprintf(stderr, "\t-t num -- set poll timeout to num\n"); 130 fprintf(stderr, "\t-T num -- set expected runtime to num ms\n"); 131 fprintf(stderr, "\t-r num -- enable slow mode, limiting each write to num bytes " 132 "-- for remove addr tests\n"); 133 fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); 134 fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n"); 135 fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); 136 fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); 137 exit(1); 138 } 139 140 static void xerror(const char *fmt, ...) 141 { 142 va_list ap; 143 144 va_start(ap, fmt); 145 vfprintf(stderr, fmt, ap); 146 va_end(ap); 147 exit(1); 148 } 149 150 static void handle_signal(int nr) 151 { 152 quit = true; 153 } 154 155 static const char *getxinfo_strerr(int err) 156 { 157 if (err == EAI_SYSTEM) 158 return strerror(errno); 159 160 return gai_strerror(err); 161 } 162 163 static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen, 164 char *host, socklen_t hostlen, 165 char *serv, socklen_t servlen) 166 { 167 int flags = NI_NUMERICHOST | NI_NUMERICSERV; 168 int err = getnameinfo(addr, addrlen, host, hostlen, serv, servlen, 169 flags); 170 171 if (err) { 172 const char *errstr = getxinfo_strerr(err); 173 174 fprintf(stderr, "Fatal: getnameinfo: %s\n", errstr); 175 exit(1); 176 } 177 } 178 179 static void xgetaddrinfo(const char *node, const char *service, 180 const struct addrinfo *hints, 181 struct addrinfo **res) 182 { 183 int err = getaddrinfo(node, service, hints, res); 184 185 if (err) { 186 const char *errstr = getxinfo_strerr(err); 187 188 fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", 189 node ? node : "", service ? service : "", errstr); 190 exit(1); 191 } 192 } 193 194 static void set_rcvbuf(int fd, unsigned int size) 195 { 196 int err; 197 198 err = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size)); 199 if (err) { 200 perror("set SO_RCVBUF"); 201 exit(1); 202 } 203 } 204 205 static void set_sndbuf(int fd, unsigned int size) 206 { 207 int err; 208 209 err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &size, sizeof(size)); 210 if (err) { 211 perror("set SO_SNDBUF"); 212 exit(1); 213 } 214 } 215 216 static void set_mark(int fd, uint32_t mark) 217 { 218 int err; 219 220 err = setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); 221 if (err) { 222 perror("set SO_MARK"); 223 exit(1); 224 } 225 } 226 227 static void set_transparent(int fd, int pf) 228 { 229 int one = 1; 230 231 switch (pf) { 232 case AF_INET: 233 if (-1 == setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one))) 234 perror("IP_TRANSPARENT"); 235 break; 236 case AF_INET6: 237 if (-1 == setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one))) 238 perror("IPV6_TRANSPARENT"); 239 break; 240 } 241 } 242 243 static void set_mptfo(int fd, int pf) 244 { 245 int qlen = 25; 246 247 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1) 248 perror("TCP_FASTOPEN"); 249 } 250 251 static int do_ulp_so(int sock, const char *name) 252 { 253 return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name)); 254 } 255 256 #define X(m) xerror("%s:%u: %s: failed for proto %d at line %u", __FILE__, __LINE__, (m), proto, line) 257 static void sock_test_tcpulp(int sock, int proto, unsigned int line) 258 { 259 socklen_t buflen = 8; 260 char buf[8] = ""; 261 int ret = getsockopt(sock, IPPROTO_TCP, TCP_ULP, buf, &buflen); 262 263 if (ret != 0) 264 X("getsockopt"); 265 266 if (buflen > 0) { 267 if (strcmp(buf, "mptcp") != 0) 268 xerror("unexpected ULP '%s' for proto %d at line %u", buf, proto, line); 269 ret = do_ulp_so(sock, "tls"); 270 if (ret == 0) 271 X("setsockopt"); 272 } else if (proto == IPPROTO_MPTCP) { 273 ret = do_ulp_so(sock, "tls"); 274 if (ret != -1) 275 X("setsockopt"); 276 } 277 278 ret = do_ulp_so(sock, "mptcp"); 279 if (ret != -1) 280 X("setsockopt"); 281 282 #undef X 283 } 284 285 #define SOCK_TEST_TCPULP(s, p) sock_test_tcpulp((s), (p), __LINE__) 286 287 static int sock_listen_mptcp(const char * const listenaddr, 288 const char * const port) 289 { 290 int sock = -1; 291 struct addrinfo hints = { 292 .ai_protocol = IPPROTO_TCP, 293 .ai_socktype = SOCK_STREAM, 294 .ai_flags = AI_PASSIVE | AI_NUMERICHOST 295 }; 296 297 hints.ai_family = pf; 298 299 struct addrinfo *a, *addr; 300 int one = 1; 301 302 xgetaddrinfo(listenaddr, port, &hints, &addr); 303 hints.ai_family = pf; 304 305 for (a = addr; a; a = a->ai_next) { 306 sock = socket(a->ai_family, a->ai_socktype, cfg_sock_proto); 307 if (sock < 0) 308 continue; 309 310 SOCK_TEST_TCPULP(sock, cfg_sock_proto); 311 312 if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, 313 sizeof(one))) 314 perror("setsockopt"); 315 316 if (cfg_sockopt_types.transparent) 317 set_transparent(sock, pf); 318 319 if (cfg_sockopt_types.mptfo) 320 set_mptfo(sock, pf); 321 322 if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) 323 break; /* success */ 324 325 perror("bind"); 326 close(sock); 327 sock = -1; 328 } 329 330 freeaddrinfo(addr); 331 332 if (sock < 0) { 333 fprintf(stderr, "Could not create listen socket\n"); 334 return sock; 335 } 336 337 SOCK_TEST_TCPULP(sock, cfg_sock_proto); 338 339 if (listen(sock, 20)) { 340 perror("listen"); 341 close(sock); 342 return -1; 343 } 344 345 SOCK_TEST_TCPULP(sock, cfg_sock_proto); 346 347 return sock; 348 } 349 350 static int sock_connect_mptcp(const char * const remoteaddr, 351 const char * const port, int proto, 352 struct addrinfo **peer, 353 int infd, struct wstate *winfo) 354 { 355 struct addrinfo hints = { 356 .ai_protocol = IPPROTO_TCP, 357 .ai_socktype = SOCK_STREAM, 358 }; 359 struct addrinfo *a, *addr; 360 int syn_copied = 0; 361 int sock = -1; 362 363 hints.ai_family = pf; 364 365 xgetaddrinfo(remoteaddr, port, &hints, &addr); 366 for (a = addr; a; a = a->ai_next) { 367 sock = socket(a->ai_family, a->ai_socktype, proto); 368 if (sock < 0) { 369 perror("socket"); 370 continue; 371 } 372 373 SOCK_TEST_TCPULP(sock, proto); 374 375 if (cfg_mark) 376 set_mark(sock, cfg_mark); 377 378 if (cfg_sockopt_types.mptfo) { 379 if (!winfo->total_len) 380 winfo->total_len = winfo->len = read(infd, winfo->buf, 381 sizeof(winfo->buf)); 382 383 syn_copied = sendto(sock, winfo->buf, winfo->len, MSG_FASTOPEN, 384 a->ai_addr, a->ai_addrlen); 385 if (syn_copied >= 0) { 386 winfo->off = syn_copied; 387 winfo->len -= syn_copied; 388 *peer = a; 389 break; /* success */ 390 } 391 } else { 392 if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { 393 *peer = a; 394 break; /* success */ 395 } 396 } 397 if (cfg_sockopt_types.mptfo) { 398 perror("sendto()"); 399 close(sock); 400 sock = -1; 401 } else { 402 perror("connect()"); 403 close(sock); 404 sock = -1; 405 } 406 } 407 408 freeaddrinfo(addr); 409 if (sock != -1) 410 SOCK_TEST_TCPULP(sock, proto); 411 return sock; 412 } 413 414 static size_t do_rnd_write(const int fd, char *buf, const size_t len) 415 { 416 static bool first = true; 417 unsigned int do_w; 418 ssize_t bw; 419 420 do_w = rand() & 0xffff; 421 if (do_w == 0 || do_w > len) 422 do_w = len; 423 424 if (cfg_join && first && do_w > 100) 425 do_w = 100; 426 427 if (cfg_remove && do_w > cfg_do_w) 428 do_w = cfg_do_w; 429 430 bw = write(fd, buf, do_w); 431 if (bw < 0) 432 return bw; 433 434 /* let the join handshake complete, before going on */ 435 if (cfg_join && first) { 436 usleep(200000); 437 first = false; 438 } 439 440 if (cfg_remove) 441 usleep(200000); 442 443 return bw; 444 } 445 446 static size_t do_write(const int fd, char *buf, const size_t len) 447 { 448 size_t offset = 0; 449 450 while (offset < len) { 451 size_t written; 452 ssize_t bw; 453 454 bw = write(fd, buf + offset, len - offset); 455 if (bw < 0) { 456 perror("write"); 457 return 0; 458 } 459 460 written = (size_t)bw; 461 offset += written; 462 } 463 464 return offset; 465 } 466 467 static void process_cmsg(struct msghdr *msgh) 468 { 469 struct __kernel_timespec ts; 470 bool inq_found = false; 471 bool ts_found = false; 472 unsigned int inq = 0; 473 struct cmsghdr *cmsg; 474 475 for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) { 476 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) { 477 memcpy(&ts, CMSG_DATA(cmsg), sizeof(ts)); 478 ts_found = true; 479 continue; 480 } 481 if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) { 482 memcpy(&inq, CMSG_DATA(cmsg), sizeof(inq)); 483 inq_found = true; 484 continue; 485 } 486 487 } 488 489 if (cfg_cmsg_types.timestampns) { 490 if (!ts_found) 491 xerror("TIMESTAMPNS not present\n"); 492 } 493 494 if (cfg_cmsg_types.tcp_inq) { 495 if (!inq_found) 496 xerror("TCP_INQ not present\n"); 497 498 if (inq > 1024) 499 xerror("tcp_inq %u is larger than one kbyte\n", inq); 500 tcp_inq.last = inq; 501 } 502 } 503 504 static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len) 505 { 506 char msg_buf[8192]; 507 struct iovec iov = { 508 .iov_base = buf, 509 .iov_len = len, 510 }; 511 struct msghdr msg = { 512 .msg_iov = &iov, 513 .msg_iovlen = 1, 514 .msg_control = msg_buf, 515 .msg_controllen = sizeof(msg_buf), 516 }; 517 int flags = 0; 518 unsigned int last_hint = tcp_inq.last; 519 int ret = recvmsg(fd, &msg, flags); 520 521 if (ret <= 0) { 522 if (ret == 0 && tcp_inq.expect_eof) 523 return ret; 524 525 if (ret == 0 && cfg_cmsg_types.tcp_inq) 526 if (last_hint != 1 && last_hint != 0) 527 xerror("EOF but last tcp_inq hint was %u\n", last_hint); 528 529 return ret; 530 } 531 532 if (tcp_inq.expect_eof) 533 xerror("expected EOF, last_hint %u, now %u\n", 534 last_hint, tcp_inq.last); 535 536 if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled) 537 xerror("got %lu bytes of cmsg data, expected 0\n", 538 (unsigned long)msg.msg_controllen); 539 540 if (msg.msg_controllen == 0 && cfg_cmsg_types.cmsg_enabled) 541 xerror("%s\n", "got no cmsg data"); 542 543 if (msg.msg_controllen) 544 process_cmsg(&msg); 545 546 if (cfg_cmsg_types.tcp_inq) { 547 if ((size_t)ret < len && last_hint > (unsigned int)ret) { 548 if (ret + 1 != (int)last_hint) { 549 int next = read(fd, msg_buf, sizeof(msg_buf)); 550 551 xerror("read %u of %u, last_hint was %u tcp_inq hint now %u next_read returned %d/%m\n", 552 ret, (unsigned int)len, last_hint, tcp_inq.last, next); 553 } else { 554 tcp_inq.expect_eof = true; 555 } 556 } 557 } 558 559 return ret; 560 } 561 562 static ssize_t do_rnd_read(const int fd, char *buf, const size_t len) 563 { 564 int ret = 0; 565 char tmp[16384]; 566 size_t cap = rand(); 567 568 cap &= 0xffff; 569 570 if (cap == 0) 571 cap = 1; 572 else if (cap > len) 573 cap = len; 574 575 if (cfg_peek == CFG_WITH_PEEK) { 576 ret = recv(fd, buf, cap, MSG_PEEK); 577 ret = (ret < 0) ? ret : read(fd, tmp, ret); 578 } else if (cfg_peek == CFG_AFTER_PEEK) { 579 ret = recv(fd, buf, cap, MSG_PEEK); 580 ret = (ret < 0) ? ret : read(fd, buf, cap); 581 } else if (cfg_cmsg_types.cmsg_enabled) { 582 ret = do_recvmsg_cmsg(fd, buf, cap); 583 } else { 584 ret = read(fd, buf, cap); 585 } 586 587 return ret; 588 } 589 590 static void set_nonblock(int fd, bool nonblock) 591 { 592 int flags = fcntl(fd, F_GETFL); 593 594 if (flags == -1) 595 return; 596 597 if (nonblock) 598 fcntl(fd, F_SETFL, flags | O_NONBLOCK); 599 else 600 fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); 601 } 602 603 static void shut_wr(int fd) 604 { 605 /* Close our write side, ev. give some time 606 * for address notification and/or checking 607 * the current status 608 */ 609 if (cfg_wait) 610 usleep(cfg_wait); 611 612 shutdown(fd, SHUT_WR); 613 } 614 615 static int copyfd_io_poll(int infd, int peerfd, int outfd, 616 bool *in_closed_after_out, struct wstate *winfo) 617 { 618 struct pollfd fds = { 619 .fd = peerfd, 620 .events = POLLIN | POLLOUT, 621 }; 622 unsigned int total_wlen = 0, total_rlen = 0; 623 624 set_nonblock(peerfd, true); 625 626 for (;;) { 627 char rbuf[8192]; 628 ssize_t len; 629 630 if (fds.events == 0) 631 break; 632 633 switch (poll(&fds, 1, poll_timeout)) { 634 case -1: 635 if (errno == EINTR) 636 continue; 637 perror("poll"); 638 return 1; 639 case 0: 640 fprintf(stderr, "%s: poll timed out (events: " 641 "POLLIN %u, POLLOUT %u)\n", __func__, 642 fds.events & POLLIN, fds.events & POLLOUT); 643 return 2; 644 } 645 646 if (fds.revents & POLLIN) { 647 ssize_t rb = sizeof(rbuf); 648 649 /* limit the total amount of read data to the trunc value*/ 650 if (cfg_truncate > 0) { 651 if (rb + total_rlen > cfg_truncate) 652 rb = cfg_truncate - total_rlen; 653 len = read(peerfd, rbuf, rb); 654 } else { 655 len = do_rnd_read(peerfd, rbuf, sizeof(rbuf)); 656 } 657 if (len == 0) { 658 /* no more data to receive: 659 * peer has closed its write side 660 */ 661 fds.events &= ~POLLIN; 662 663 if ((fds.events & POLLOUT) == 0) { 664 *in_closed_after_out = true; 665 /* and nothing more to send */ 666 break; 667 } 668 669 /* Else, still have data to transmit */ 670 } else if (len < 0) { 671 if (cfg_rcv_trunc) 672 return 0; 673 perror("read"); 674 return 3; 675 } 676 677 total_rlen += len; 678 do_write(outfd, rbuf, len); 679 } 680 681 if (fds.revents & POLLOUT) { 682 if (winfo->len == 0) { 683 winfo->off = 0; 684 winfo->len = read(infd, winfo->buf, sizeof(winfo->buf)); 685 } 686 687 if (winfo->len > 0) { 688 ssize_t bw; 689 690 /* limit the total amount of written data to the trunc value */ 691 if (cfg_truncate > 0 && winfo->len + total_wlen > cfg_truncate) 692 winfo->len = cfg_truncate - total_wlen; 693 694 bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len); 695 if (bw < 0) { 696 if (cfg_rcv_trunc) 697 return 0; 698 perror("write"); 699 return 111; 700 } 701 702 winfo->off += bw; 703 winfo->len -= bw; 704 total_wlen += bw; 705 } else if (winfo->len == 0) { 706 /* We have no more data to send. */ 707 fds.events &= ~POLLOUT; 708 709 if ((fds.events & POLLIN) == 0) 710 /* ... and peer also closed already */ 711 break; 712 713 shut_wr(peerfd); 714 } else { 715 if (errno == EINTR) 716 continue; 717 perror("read"); 718 return 4; 719 } 720 } 721 722 if (fds.revents & (POLLERR | POLLNVAL)) { 723 if (cfg_rcv_trunc) 724 return 0; 725 fprintf(stderr, "Unexpected revents: " 726 "POLLERR/POLLNVAL(%x)\n", fds.revents); 727 return 5; 728 } 729 730 if (cfg_truncate > 0 && total_wlen >= cfg_truncate && 731 total_rlen >= cfg_truncate) 732 break; 733 } 734 735 /* leave some time for late join/announce */ 736 if (cfg_remove) 737 usleep(cfg_wait); 738 739 return 0; 740 } 741 742 static int do_recvfile(int infd, int outfd) 743 { 744 ssize_t r; 745 746 do { 747 char buf[16384]; 748 749 r = do_rnd_read(infd, buf, sizeof(buf)); 750 if (r > 0) { 751 if (write(outfd, buf, r) != r) 752 break; 753 } else if (r < 0) { 754 perror("read"); 755 } 756 } while (r > 0); 757 758 return (int)r; 759 } 760 761 static int spool_buf(int fd, struct wstate *winfo) 762 { 763 while (winfo->len) { 764 int ret = write(fd, winfo->buf + winfo->off, winfo->len); 765 766 if (ret < 0) { 767 perror("write"); 768 return 4; 769 } 770 winfo->off += ret; 771 winfo->len -= ret; 772 } 773 return 0; 774 } 775 776 static int do_mmap(int infd, int outfd, unsigned int size, 777 struct wstate *winfo) 778 { 779 char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0); 780 ssize_t ret = 0, off = winfo->total_len; 781 size_t rem; 782 783 if (inbuf == MAP_FAILED) { 784 perror("mmap"); 785 return 1; 786 } 787 788 ret = spool_buf(outfd, winfo); 789 if (ret < 0) 790 return ret; 791 792 rem = size - winfo->total_len; 793 794 while (rem > 0) { 795 ret = write(outfd, inbuf + off, rem); 796 797 if (ret < 0) { 798 perror("write"); 799 break; 800 } 801 802 off += ret; 803 rem -= ret; 804 } 805 806 munmap(inbuf, size); 807 return rem; 808 } 809 810 static int get_infd_size(int fd) 811 { 812 struct stat sb; 813 ssize_t count; 814 int err; 815 816 err = fstat(fd, &sb); 817 if (err < 0) { 818 perror("fstat"); 819 return -1; 820 } 821 822 if ((sb.st_mode & S_IFMT) != S_IFREG) { 823 fprintf(stderr, "%s: stdin is not a regular file\n", __func__); 824 return -2; 825 } 826 827 count = sb.st_size; 828 if (count > INT_MAX) { 829 fprintf(stderr, "File too large: %zu\n", count); 830 return -3; 831 } 832 833 return (int)count; 834 } 835 836 static int do_sendfile(int infd, int outfd, unsigned int count, 837 struct wstate *winfo) 838 { 839 int ret = spool_buf(outfd, winfo); 840 841 if (ret < 0) 842 return ret; 843 844 count -= winfo->total_len; 845 846 while (count > 0) { 847 ssize_t r; 848 849 r = sendfile(outfd, infd, NULL, count); 850 if (r < 0) { 851 perror("sendfile"); 852 return 3; 853 } 854 855 count -= r; 856 } 857 858 return 0; 859 } 860 861 static int copyfd_io_mmap(int infd, int peerfd, int outfd, 862 unsigned int size, bool *in_closed_after_out, 863 struct wstate *winfo) 864 { 865 int err; 866 867 if (listen_mode) { 868 err = do_recvfile(peerfd, outfd); 869 if (err) 870 return err; 871 872 err = do_mmap(infd, peerfd, size, winfo); 873 } else { 874 err = do_mmap(infd, peerfd, size, winfo); 875 if (err) 876 return err; 877 878 shut_wr(peerfd); 879 880 err = do_recvfile(peerfd, outfd); 881 *in_closed_after_out = true; 882 } 883 884 return err; 885 } 886 887 static int copyfd_io_sendfile(int infd, int peerfd, int outfd, 888 unsigned int size, bool *in_closed_after_out, struct wstate *winfo) 889 { 890 int err; 891 892 if (listen_mode) { 893 err = do_recvfile(peerfd, outfd); 894 if (err) 895 return err; 896 897 err = do_sendfile(infd, peerfd, size, winfo); 898 } else { 899 err = do_sendfile(infd, peerfd, size, winfo); 900 if (err) 901 return err; 902 903 shut_wr(peerfd); 904 905 err = do_recvfile(peerfd, outfd); 906 *in_closed_after_out = true; 907 } 908 909 return err; 910 } 911 912 static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo) 913 { 914 bool in_closed_after_out = false; 915 struct timespec start, end; 916 int file_size; 917 int ret; 918 919 if (cfg_time && (clock_gettime(CLOCK_MONOTONIC, &start) < 0)) 920 xerror("can not fetch start time %d", errno); 921 922 switch (cfg_mode) { 923 case CFG_MODE_POLL: 924 ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out, 925 winfo); 926 break; 927 928 case CFG_MODE_MMAP: 929 file_size = get_infd_size(infd); 930 if (file_size < 0) 931 return file_size; 932 ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, 933 &in_closed_after_out, winfo); 934 break; 935 936 case CFG_MODE_SENDFILE: 937 file_size = get_infd_size(infd); 938 if (file_size < 0) 939 return file_size; 940 ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, 941 &in_closed_after_out, winfo); 942 break; 943 944 default: 945 fprintf(stderr, "Invalid mode %d\n", cfg_mode); 946 947 die_usage(); 948 return 1; 949 } 950 951 if (ret) 952 return ret; 953 954 if (close_peerfd) 955 close(peerfd); 956 957 if (cfg_time) { 958 unsigned int delta_ms; 959 960 if (clock_gettime(CLOCK_MONOTONIC, &end) < 0) 961 xerror("can not fetch end time %d", errno); 962 delta_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000; 963 if (delta_ms > cfg_time) { 964 xerror("transfer slower than expected! runtime %d ms, expected %d ms", 965 delta_ms, cfg_time); 966 } 967 968 /* show the runtime only if this end shutdown(wr) before receiving the EOF, 969 * (that is, if this end got the longer runtime) 970 */ 971 if (in_closed_after_out) 972 fprintf(stderr, "%d", delta_ms); 973 } 974 975 return 0; 976 } 977 978 static void check_sockaddr(int pf, struct sockaddr_storage *ss, 979 socklen_t salen) 980 { 981 struct sockaddr_in6 *sin6; 982 struct sockaddr_in *sin; 983 socklen_t wanted_size = 0; 984 985 switch (pf) { 986 case AF_INET: 987 wanted_size = sizeof(*sin); 988 sin = (void *)ss; 989 if (!sin->sin_port) 990 fprintf(stderr, "accept: something wrong: ip connection from port 0"); 991 break; 992 case AF_INET6: 993 wanted_size = sizeof(*sin6); 994 sin6 = (void *)ss; 995 if (!sin6->sin6_port) 996 fprintf(stderr, "accept: something wrong: ipv6 connection from port 0"); 997 break; 998 default: 999 fprintf(stderr, "accept: Unknown pf %d, salen %u\n", pf, salen); 1000 return; 1001 } 1002 1003 if (salen != wanted_size) 1004 fprintf(stderr, "accept: size mismatch, got %d expected %d\n", 1005 (int)salen, wanted_size); 1006 1007 if (ss->ss_family != pf) 1008 fprintf(stderr, "accept: pf mismatch, expect %d, ss_family is %d\n", 1009 (int)ss->ss_family, pf); 1010 } 1011 1012 static void check_getpeername(int fd, struct sockaddr_storage *ss, socklen_t salen) 1013 { 1014 struct sockaddr_storage peerss; 1015 socklen_t peersalen = sizeof(peerss); 1016 1017 if (getpeername(fd, (struct sockaddr *)&peerss, &peersalen) < 0) { 1018 perror("getpeername"); 1019 return; 1020 } 1021 1022 if (peersalen != salen) { 1023 fprintf(stderr, "%s: %d vs %d\n", __func__, peersalen, salen); 1024 return; 1025 } 1026 1027 if (memcmp(ss, &peerss, peersalen)) { 1028 char a[INET6_ADDRSTRLEN]; 1029 char b[INET6_ADDRSTRLEN]; 1030 char c[INET6_ADDRSTRLEN]; 1031 char d[INET6_ADDRSTRLEN]; 1032 1033 xgetnameinfo((struct sockaddr *)ss, salen, 1034 a, sizeof(a), b, sizeof(b)); 1035 1036 xgetnameinfo((struct sockaddr *)&peerss, peersalen, 1037 c, sizeof(c), d, sizeof(d)); 1038 1039 fprintf(stderr, "%s: memcmp failure: accept %s vs peername %s, %s vs %s salen %d vs %d\n", 1040 __func__, a, c, b, d, peersalen, salen); 1041 } 1042 } 1043 1044 static void check_getpeername_connect(int fd) 1045 { 1046 struct sockaddr_storage ss; 1047 socklen_t salen = sizeof(ss); 1048 char a[INET6_ADDRSTRLEN]; 1049 char b[INET6_ADDRSTRLEN]; 1050 1051 if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) { 1052 perror("getpeername"); 1053 return; 1054 } 1055 1056 xgetnameinfo((struct sockaddr *)&ss, salen, 1057 a, sizeof(a), b, sizeof(b)); 1058 1059 if (strcmp(cfg_host, a) || strcmp(cfg_port, b)) 1060 fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__, 1061 cfg_host, a, cfg_port, b); 1062 } 1063 1064 static void maybe_close(int fd) 1065 { 1066 unsigned int r = rand(); 1067 1068 if (!(cfg_join || cfg_remove || cfg_repeat > 1) && (r & 1)) 1069 close(fd); 1070 } 1071 1072 int main_loop_s(int listensock) 1073 { 1074 struct sockaddr_storage ss; 1075 struct wstate winfo; 1076 struct pollfd polls; 1077 socklen_t salen; 1078 int remotesock; 1079 int fd = 0; 1080 1081 again: 1082 polls.fd = listensock; 1083 polls.events = POLLIN; 1084 1085 switch (poll(&polls, 1, poll_timeout)) { 1086 case -1: 1087 perror("poll"); 1088 return 1; 1089 case 0: 1090 fprintf(stderr, "%s: timed out\n", __func__); 1091 close(listensock); 1092 return 2; 1093 } 1094 1095 salen = sizeof(ss); 1096 remotesock = accept(listensock, (struct sockaddr *)&ss, &salen); 1097 if (remotesock >= 0) { 1098 maybe_close(listensock); 1099 check_sockaddr(pf, &ss, salen); 1100 check_getpeername(remotesock, &ss, salen); 1101 1102 if (cfg_input) { 1103 fd = open(cfg_input, O_RDONLY); 1104 if (fd < 0) 1105 xerror("can't open %s: %d", cfg_input, errno); 1106 } 1107 1108 SOCK_TEST_TCPULP(remotesock, 0); 1109 1110 memset(&winfo, 0, sizeof(winfo)); 1111 copyfd_io(fd, remotesock, 1, true, &winfo); 1112 } else { 1113 perror("accept"); 1114 return 1; 1115 } 1116 1117 if (--cfg_repeat > 0) { 1118 if (cfg_input) 1119 close(fd); 1120 goto again; 1121 } 1122 1123 return 0; 1124 } 1125 1126 static void init_rng(void) 1127 { 1128 int fd = open("/dev/urandom", O_RDONLY); 1129 unsigned int foo; 1130 1131 if (fd > 0) { 1132 int ret = read(fd, &foo, sizeof(foo)); 1133 1134 if (ret < 0) 1135 srand(fd + foo); 1136 close(fd); 1137 } 1138 1139 srand(foo); 1140 } 1141 1142 static void xsetsockopt(int fd, int level, int optname, const void *optval, socklen_t optlen) 1143 { 1144 int err; 1145 1146 err = setsockopt(fd, level, optname, optval, optlen); 1147 if (err) { 1148 perror("setsockopt"); 1149 exit(1); 1150 } 1151 } 1152 1153 static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg) 1154 { 1155 static const unsigned int on = 1; 1156 1157 if (cmsg->timestampns) 1158 xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on)); 1159 if (cmsg->tcp_inq) 1160 xsetsockopt(fd, IPPROTO_TCP, TCP_INQ, &on, sizeof(on)); 1161 } 1162 1163 static void parse_cmsg_types(const char *type) 1164 { 1165 char *next = strchr(type, ','); 1166 unsigned int len = 0; 1167 1168 cfg_cmsg_types.cmsg_enabled = 1; 1169 1170 if (next) { 1171 parse_cmsg_types(next + 1); 1172 len = next - type; 1173 } else { 1174 len = strlen(type); 1175 } 1176 1177 if (strncmp(type, "TIMESTAMPNS", len) == 0) { 1178 cfg_cmsg_types.timestampns = 1; 1179 return; 1180 } 1181 1182 if (strncmp(type, "TCPINQ", len) == 0) { 1183 cfg_cmsg_types.tcp_inq = 1; 1184 return; 1185 } 1186 1187 fprintf(stderr, "Unrecognized cmsg option %s\n", type); 1188 exit(1); 1189 } 1190 1191 static void parse_setsock_options(const char *name) 1192 { 1193 char *next = strchr(name, ','); 1194 unsigned int len = 0; 1195 1196 if (next) { 1197 parse_setsock_options(next + 1); 1198 len = next - name; 1199 } else { 1200 len = strlen(name); 1201 } 1202 1203 if (strncmp(name, "TRANSPARENT", len) == 0) { 1204 cfg_sockopt_types.transparent = 1; 1205 return; 1206 } 1207 1208 if (strncmp(name, "MPTFO", len) == 0) { 1209 cfg_sockopt_types.mptfo = 1; 1210 return; 1211 } 1212 1213 fprintf(stderr, "Unrecognized setsockopt option %s\n", name); 1214 exit(1); 1215 } 1216 1217 void xdisconnect(int fd, int addrlen) 1218 { 1219 struct sockaddr_storage empty; 1220 int msec_sleep = 10; 1221 int queued = 1; 1222 int i; 1223 1224 shutdown(fd, SHUT_WR); 1225 1226 /* while until the pending data is completely flushed, the later 1227 * disconnect will bypass/ignore/drop any pending data. 1228 */ 1229 for (i = 0; ; i += msec_sleep) { 1230 if (ioctl(fd, SIOCOUTQ, &queued) < 0) 1231 xerror("can't query out socket queue: %d", errno); 1232 1233 if (!queued) 1234 break; 1235 1236 if (i > poll_timeout) 1237 xerror("timeout while waiting for spool to complete"); 1238 usleep(msec_sleep * 1000); 1239 } 1240 1241 memset(&empty, 0, sizeof(empty)); 1242 empty.ss_family = AF_UNSPEC; 1243 if (connect(fd, (struct sockaddr *)&empty, addrlen) < 0) 1244 xerror("can't disconnect: %d", errno); 1245 } 1246 1247 int main_loop(void) 1248 { 1249 int fd = 0, ret, fd_in = 0; 1250 struct addrinfo *peer; 1251 struct wstate winfo; 1252 1253 if (cfg_input && cfg_sockopt_types.mptfo) { 1254 fd_in = open(cfg_input, O_RDONLY); 1255 if (fd < 0) 1256 xerror("can't open %s:%d", cfg_input, errno); 1257 } 1258 1259 memset(&winfo, 0, sizeof(winfo)); 1260 fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer, fd_in, &winfo); 1261 if (fd < 0) 1262 return 2; 1263 1264 again: 1265 check_getpeername_connect(fd); 1266 1267 SOCK_TEST_TCPULP(fd, cfg_sock_proto); 1268 1269 if (cfg_rcvbuf) 1270 set_rcvbuf(fd, cfg_rcvbuf); 1271 if (cfg_sndbuf) 1272 set_sndbuf(fd, cfg_sndbuf); 1273 if (cfg_cmsg_types.cmsg_enabled) 1274 apply_cmsg_types(fd, &cfg_cmsg_types); 1275 1276 if (cfg_input && !cfg_sockopt_types.mptfo) { 1277 fd_in = open(cfg_input, O_RDONLY); 1278 if (fd < 0) 1279 xerror("can't open %s:%d", cfg_input, errno); 1280 } 1281 1282 ret = copyfd_io(fd_in, fd, 1, 0, &winfo); 1283 if (ret) 1284 return ret; 1285 1286 if (cfg_truncate > 0) { 1287 xdisconnect(fd, peer->ai_addrlen); 1288 } else if (--cfg_repeat > 0) { 1289 xdisconnect(fd, peer->ai_addrlen); 1290 1291 /* the socket could be unblocking at this point, we need the 1292 * connect to be blocking 1293 */ 1294 set_nonblock(fd, false); 1295 if (connect(fd, peer->ai_addr, peer->ai_addrlen)) 1296 xerror("can't reconnect: %d", errno); 1297 if (cfg_input) 1298 close(fd_in); 1299 memset(&winfo, 0, sizeof(winfo)); 1300 goto again; 1301 } else { 1302 close(fd); 1303 } 1304 1305 return 0; 1306 } 1307 1308 int parse_proto(const char *proto) 1309 { 1310 if (!strcasecmp(proto, "MPTCP")) 1311 return IPPROTO_MPTCP; 1312 if (!strcasecmp(proto, "TCP")) 1313 return IPPROTO_TCP; 1314 1315 fprintf(stderr, "Unknown protocol: %s\n.", proto); 1316 die_usage(); 1317 1318 /* silence compiler warning */ 1319 return 0; 1320 } 1321 1322 int parse_mode(const char *mode) 1323 { 1324 if (!strcasecmp(mode, "poll")) 1325 return CFG_MODE_POLL; 1326 if (!strcasecmp(mode, "mmap")) 1327 return CFG_MODE_MMAP; 1328 if (!strcasecmp(mode, "sendfile")) 1329 return CFG_MODE_SENDFILE; 1330 1331 fprintf(stderr, "Unknown test mode: %s\n", mode); 1332 fprintf(stderr, "Supported modes are:\n"); 1333 fprintf(stderr, "\t\t\"poll\" - interleaved read/write using poll()\n"); 1334 fprintf(stderr, "\t\t\"mmap\" - send entire input file (mmap+write), then read response (-l will read input first)\n"); 1335 fprintf(stderr, "\t\t\"sendfile\" - send entire input file (sendfile), then read response (-l will read input first)\n"); 1336 1337 die_usage(); 1338 1339 /* silence compiler warning */ 1340 return 0; 1341 } 1342 1343 int parse_peek(const char *mode) 1344 { 1345 if (!strcasecmp(mode, "saveWithPeek")) 1346 return CFG_WITH_PEEK; 1347 if (!strcasecmp(mode, "saveAfterPeek")) 1348 return CFG_AFTER_PEEK; 1349 1350 fprintf(stderr, "Unknown: %s\n", mode); 1351 fprintf(stderr, "Supported MSG_PEEK mode are:\n"); 1352 fprintf(stderr, 1353 "\t\t\"saveWithPeek\" - recv data with flags 'MSG_PEEK' and save the peek data into file\n"); 1354 fprintf(stderr, 1355 "\t\t\"saveAfterPeek\" - read and save data into file after recv with flags 'MSG_PEEK'\n"); 1356 1357 die_usage(); 1358 1359 /* silence compiler warning */ 1360 return 0; 1361 } 1362 1363 static int parse_int(const char *size) 1364 { 1365 unsigned long s; 1366 1367 errno = 0; 1368 1369 s = strtoul(size, NULL, 0); 1370 1371 if (errno) { 1372 fprintf(stderr, "Invalid sndbuf size %s (%s)\n", 1373 size, strerror(errno)); 1374 die_usage(); 1375 } 1376 1377 if (s > INT_MAX) { 1378 fprintf(stderr, "Invalid sndbuf size %s (%s)\n", 1379 size, strerror(ERANGE)); 1380 die_usage(); 1381 } 1382 1383 return (int)s; 1384 } 1385 1386 static void parse_opts(int argc, char **argv) 1387 { 1388 int c; 1389 1390 while ((c = getopt(argc, argv, "6c:f:hi:I:jlm:M:o:p:P:r:R:s:S:t:T:w:")) != -1) { 1391 switch (c) { 1392 case 'f': 1393 cfg_truncate = atoi(optarg); 1394 1395 /* when receiving a fastclose, ignore PIPE signals and 1396 * all the I/O errors later in the code 1397 */ 1398 if (cfg_truncate < 0) { 1399 cfg_rcv_trunc = true; 1400 signal(SIGPIPE, handle_signal); 1401 } 1402 break; 1403 case 'j': 1404 cfg_join = true; 1405 cfg_mode = CFG_MODE_POLL; 1406 break; 1407 case 'r': 1408 cfg_remove = true; 1409 cfg_mode = CFG_MODE_POLL; 1410 cfg_wait = 400000; 1411 cfg_do_w = atoi(optarg); 1412 if (cfg_do_w <= 0) 1413 cfg_do_w = 50; 1414 break; 1415 case 'i': 1416 cfg_input = optarg; 1417 break; 1418 case 'I': 1419 cfg_repeat = atoi(optarg); 1420 break; 1421 case 'l': 1422 listen_mode = true; 1423 break; 1424 case 'p': 1425 cfg_port = optarg; 1426 break; 1427 case 's': 1428 cfg_sock_proto = parse_proto(optarg); 1429 break; 1430 case 'h': 1431 die_usage(); 1432 break; 1433 case '6': 1434 pf = AF_INET6; 1435 break; 1436 case 't': 1437 poll_timeout = atoi(optarg) * 1000; 1438 if (poll_timeout <= 0) 1439 poll_timeout = -1; 1440 break; 1441 case 'T': 1442 cfg_time = atoi(optarg); 1443 break; 1444 case 'm': 1445 cfg_mode = parse_mode(optarg); 1446 break; 1447 case 'S': 1448 cfg_sndbuf = parse_int(optarg); 1449 break; 1450 case 'R': 1451 cfg_rcvbuf = parse_int(optarg); 1452 break; 1453 case 'w': 1454 cfg_wait = atoi(optarg)*1000000; 1455 break; 1456 case 'M': 1457 cfg_mark = strtol(optarg, NULL, 0); 1458 break; 1459 case 'P': 1460 cfg_peek = parse_peek(optarg); 1461 break; 1462 case 'c': 1463 parse_cmsg_types(optarg); 1464 break; 1465 case 'o': 1466 parse_setsock_options(optarg); 1467 break; 1468 } 1469 } 1470 1471 if (optind + 1 != argc) 1472 die_usage(); 1473 cfg_host = argv[optind]; 1474 1475 if (strchr(cfg_host, ':')) 1476 pf = AF_INET6; 1477 } 1478 1479 int main(int argc, char *argv[]) 1480 { 1481 init_rng(); 1482 1483 signal(SIGUSR1, handle_signal); 1484 parse_opts(argc, argv); 1485 1486 if (listen_mode) { 1487 int fd = sock_listen_mptcp(cfg_host, cfg_port); 1488 1489 if (fd < 0) 1490 return 1; 1491 1492 if (cfg_rcvbuf) 1493 set_rcvbuf(fd, cfg_rcvbuf); 1494 if (cfg_sndbuf) 1495 set_sndbuf(fd, cfg_sndbuf); 1496 if (cfg_mark) 1497 set_mark(fd, cfg_mark); 1498 if (cfg_cmsg_types.cmsg_enabled) 1499 apply_cmsg_types(fd, &cfg_cmsg_types); 1500 1501 return main_loop_s(fd); 1502 } 1503 1504 return main_loop(); 1505 } 1506