1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define _GNU_SOURCE
4 
5 #include <errno.h>
6 #include <limits.h>
7 #include <fcntl.h>
8 #include <string.h>
9 #include <stdarg.h>
10 #include <stdbool.h>
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <strings.h>
15 #include <signal.h>
16 #include <unistd.h>
17 #include <time.h>
18 
19 #include <sys/ioctl.h>
20 #include <sys/poll.h>
21 #include <sys/sendfile.h>
22 #include <sys/stat.h>
23 #include <sys/socket.h>
24 #include <sys/types.h>
25 #include <sys/mman.h>
26 
27 #include <netdb.h>
28 #include <netinet/in.h>
29 
30 #include <linux/tcp.h>
31 #include <linux/time_types.h>
32 #include <linux/sockios.h>
33 
34 extern int optind;
35 
36 #ifndef IPPROTO_MPTCP
37 #define IPPROTO_MPTCP 262
38 #endif
39 #ifndef TCP_ULP
40 #define TCP_ULP 31
41 #endif
42 
43 static int  poll_timeout = 10 * 1000;
44 static bool listen_mode;
45 static bool quit;
46 
47 enum cfg_mode {
48 	CFG_MODE_POLL,
49 	CFG_MODE_MMAP,
50 	CFG_MODE_SENDFILE,
51 };
52 
53 enum cfg_peek {
54 	CFG_NONE_PEEK,
55 	CFG_WITH_PEEK,
56 	CFG_AFTER_PEEK,
57 };
58 
59 static enum cfg_mode cfg_mode = CFG_MODE_POLL;
60 static enum cfg_peek cfg_peek = CFG_NONE_PEEK;
61 static const char *cfg_host;
62 static const char *cfg_port	= "12000";
63 static int cfg_sock_proto	= IPPROTO_MPTCP;
64 static int pf = AF_INET;
65 static int cfg_sndbuf;
66 static int cfg_rcvbuf;
67 static bool cfg_join;
68 static bool cfg_remove;
69 static unsigned int cfg_time;
70 static unsigned int cfg_do_w;
71 static int cfg_wait;
72 static uint32_t cfg_mark;
73 static char *cfg_input;
74 static int cfg_repeat = 1;
75 static int cfg_truncate;
76 static int cfg_rcv_trunc;
77 
78 struct cfg_cmsg_types {
79 	unsigned int cmsg_enabled:1;
80 	unsigned int timestampns:1;
81 	unsigned int tcp_inq:1;
82 };
83 
84 struct cfg_sockopt_types {
85 	unsigned int transparent:1;
86 };
87 
88 struct tcp_inq_state {
89 	unsigned int last;
90 	bool expect_eof;
91 };
92 
93 static struct tcp_inq_state tcp_inq;
94 
95 static struct cfg_cmsg_types cfg_cmsg_types;
96 static struct cfg_sockopt_types cfg_sockopt_types;
97 
98 static void die_usage(void)
99 {
100 	fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-f offset] [-i file] [-I num] [-j] [-l] "
101 		"[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-j] [-l] [-r num] "
102 		"[-s MPTCP|TCP] [-S num] [-r num] [-t num] [-T num] [-u] [-w sec] connect_address\n");
103 	fprintf(stderr, "\t-6 use ipv6\n");
104 	fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n");
105 	fprintf(stderr, "\t-f offset -- stop the I/O after receiving and sending the specified amount "
106 		"of bytes. If there are unread bytes in the receive queue, that will cause a MPTCP "
107 		"fastclose at close/shutdown. If offset is negative, expect the peer to close before "
108 		"all the local data as been sent, thus toleration errors on write and EPIPE signals\n");
109 	fprintf(stderr, "\t-i file -- read the data to send from the given file instead of stdin");
110 	fprintf(stderr, "\t-I num -- repeat the transfer 'num' times. In listen mode accepts num "
111 		"incoming connections, in client mode, disconnect and reconnect to the server\n");
112 	fprintf(stderr, "\t-j     -- add additional sleep at connection start and tear down "
113 		"-- for MPJ tests\n");
114 	fprintf(stderr, "\t-l     -- listens mode, accepts incoming connection\n");
115 	fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
116 	fprintf(stderr, "\t-M mark -- set socket packet mark\n");
117 	fprintf(stderr, "\t-o option -- test sockopt <option>\n");
118 	fprintf(stderr, "\t-p num -- use port num\n");
119 	fprintf(stderr,
120 		"\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n");
121 	fprintf(stderr, "\t-t num -- set poll timeout to num\n");
122 	fprintf(stderr, "\t-T num -- set expected runtime to num ms\n");
123 	fprintf(stderr, "\t-r num -- enable slow mode, limiting each write to num bytes "
124 		"-- for remove addr tests\n");
125 	fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
126 	fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n");
127 	fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
128 	fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
129 	exit(1);
130 }
131 
132 static void xerror(const char *fmt, ...)
133 {
134 	va_list ap;
135 
136 	va_start(ap, fmt);
137 	vfprintf(stderr, fmt, ap);
138 	va_end(ap);
139 	exit(1);
140 }
141 
142 static void handle_signal(int nr)
143 {
144 	quit = true;
145 }
146 
147 static const char *getxinfo_strerr(int err)
148 {
149 	if (err == EAI_SYSTEM)
150 		return strerror(errno);
151 
152 	return gai_strerror(err);
153 }
154 
155 static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen,
156 			 char *host, socklen_t hostlen,
157 			 char *serv, socklen_t servlen)
158 {
159 	int flags = NI_NUMERICHOST | NI_NUMERICSERV;
160 	int err = getnameinfo(addr, addrlen, host, hostlen, serv, servlen,
161 			      flags);
162 
163 	if (err) {
164 		const char *errstr = getxinfo_strerr(err);
165 
166 		fprintf(stderr, "Fatal: getnameinfo: %s\n", errstr);
167 		exit(1);
168 	}
169 }
170 
171 static void xgetaddrinfo(const char *node, const char *service,
172 			 const struct addrinfo *hints,
173 			 struct addrinfo **res)
174 {
175 	int err = getaddrinfo(node, service, hints, res);
176 
177 	if (err) {
178 		const char *errstr = getxinfo_strerr(err);
179 
180 		fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
181 			node ? node : "", service ? service : "", errstr);
182 		exit(1);
183 	}
184 }
185 
186 static void set_rcvbuf(int fd, unsigned int size)
187 {
188 	int err;
189 
190 	err = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size));
191 	if (err) {
192 		perror("set SO_RCVBUF");
193 		exit(1);
194 	}
195 }
196 
197 static void set_sndbuf(int fd, unsigned int size)
198 {
199 	int err;
200 
201 	err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &size, sizeof(size));
202 	if (err) {
203 		perror("set SO_SNDBUF");
204 		exit(1);
205 	}
206 }
207 
208 static void set_mark(int fd, uint32_t mark)
209 {
210 	int err;
211 
212 	err = setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark));
213 	if (err) {
214 		perror("set SO_MARK");
215 		exit(1);
216 	}
217 }
218 
219 static void set_transparent(int fd, int pf)
220 {
221 	int one = 1;
222 
223 	switch (pf) {
224 	case AF_INET:
225 		if (-1 == setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)))
226 			perror("IP_TRANSPARENT");
227 		break;
228 	case AF_INET6:
229 		if (-1 == setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)))
230 			perror("IPV6_TRANSPARENT");
231 		break;
232 	}
233 }
234 
235 static int do_ulp_so(int sock, const char *name)
236 {
237 	return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name));
238 }
239 
240 #define X(m)	xerror("%s:%u: %s: failed for proto %d at line %u", __FILE__, __LINE__, (m), proto, line)
241 static void sock_test_tcpulp(int sock, int proto, unsigned int line)
242 {
243 	socklen_t buflen = 8;
244 	char buf[8] = "";
245 	int ret = getsockopt(sock, IPPROTO_TCP, TCP_ULP, buf, &buflen);
246 
247 	if (ret != 0)
248 		X("getsockopt");
249 
250 	if (buflen > 0) {
251 		if (strcmp(buf, "mptcp") != 0)
252 			xerror("unexpected ULP '%s' for proto %d at line %u", buf, proto, line);
253 		ret = do_ulp_so(sock, "tls");
254 		if (ret == 0)
255 			X("setsockopt");
256 	} else if (proto == IPPROTO_MPTCP) {
257 		ret = do_ulp_so(sock, "tls");
258 		if (ret != -1)
259 			X("setsockopt");
260 	}
261 
262 	ret = do_ulp_so(sock, "mptcp");
263 	if (ret != -1)
264 		X("setsockopt");
265 
266 #undef X
267 }
268 
269 #define SOCK_TEST_TCPULP(s, p) sock_test_tcpulp((s), (p), __LINE__)
270 
271 static int sock_listen_mptcp(const char * const listenaddr,
272 			     const char * const port)
273 {
274 	int sock = -1;
275 	struct addrinfo hints = {
276 		.ai_protocol = IPPROTO_TCP,
277 		.ai_socktype = SOCK_STREAM,
278 		.ai_flags = AI_PASSIVE | AI_NUMERICHOST
279 	};
280 
281 	hints.ai_family = pf;
282 
283 	struct addrinfo *a, *addr;
284 	int one = 1;
285 
286 	xgetaddrinfo(listenaddr, port, &hints, &addr);
287 	hints.ai_family = pf;
288 
289 	for (a = addr; a; a = a->ai_next) {
290 		sock = socket(a->ai_family, a->ai_socktype, cfg_sock_proto);
291 		if (sock < 0)
292 			continue;
293 
294 		SOCK_TEST_TCPULP(sock, cfg_sock_proto);
295 
296 		if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one,
297 				     sizeof(one)))
298 			perror("setsockopt");
299 
300 		if (cfg_sockopt_types.transparent)
301 			set_transparent(sock, pf);
302 
303 		if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
304 			break; /* success */
305 
306 		perror("bind");
307 		close(sock);
308 		sock = -1;
309 	}
310 
311 	freeaddrinfo(addr);
312 
313 	if (sock < 0) {
314 		fprintf(stderr, "Could not create listen socket\n");
315 		return sock;
316 	}
317 
318 	SOCK_TEST_TCPULP(sock, cfg_sock_proto);
319 
320 	if (listen(sock, 20)) {
321 		perror("listen");
322 		close(sock);
323 		return -1;
324 	}
325 
326 	SOCK_TEST_TCPULP(sock, cfg_sock_proto);
327 
328 	return sock;
329 }
330 
331 static int sock_connect_mptcp(const char * const remoteaddr,
332 			      const char * const port, int proto,
333 			      struct addrinfo **peer)
334 {
335 	struct addrinfo hints = {
336 		.ai_protocol = IPPROTO_TCP,
337 		.ai_socktype = SOCK_STREAM,
338 	};
339 	struct addrinfo *a, *addr;
340 	int sock = -1;
341 
342 	hints.ai_family = pf;
343 
344 	xgetaddrinfo(remoteaddr, port, &hints, &addr);
345 	for (a = addr; a; a = a->ai_next) {
346 		sock = socket(a->ai_family, a->ai_socktype, proto);
347 		if (sock < 0) {
348 			perror("socket");
349 			continue;
350 		}
351 
352 		SOCK_TEST_TCPULP(sock, proto);
353 
354 		if (cfg_mark)
355 			set_mark(sock, cfg_mark);
356 
357 		if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) {
358 			*peer = a;
359 			break; /* success */
360 		}
361 
362 		perror("connect()");
363 		close(sock);
364 		sock = -1;
365 	}
366 
367 	freeaddrinfo(addr);
368 	if (sock != -1)
369 		SOCK_TEST_TCPULP(sock, proto);
370 	return sock;
371 }
372 
373 static size_t do_rnd_write(const int fd, char *buf, const size_t len)
374 {
375 	static bool first = true;
376 	unsigned int do_w;
377 	ssize_t bw;
378 
379 	do_w = rand() & 0xffff;
380 	if (do_w == 0 || do_w > len)
381 		do_w = len;
382 
383 	if (cfg_join && first && do_w > 100)
384 		do_w = 100;
385 
386 	if (cfg_remove && do_w > cfg_do_w)
387 		do_w = cfg_do_w;
388 
389 	bw = write(fd, buf, do_w);
390 	if (bw < 0)
391 		return bw;
392 
393 	/* let the join handshake complete, before going on */
394 	if (cfg_join && first) {
395 		usleep(200000);
396 		first = false;
397 	}
398 
399 	if (cfg_remove)
400 		usleep(200000);
401 
402 	return bw;
403 }
404 
405 static size_t do_write(const int fd, char *buf, const size_t len)
406 {
407 	size_t offset = 0;
408 
409 	while (offset < len) {
410 		size_t written;
411 		ssize_t bw;
412 
413 		bw = write(fd, buf + offset, len - offset);
414 		if (bw < 0) {
415 			perror("write");
416 			return 0;
417 		}
418 
419 		written = (size_t)bw;
420 		offset += written;
421 	}
422 
423 	return offset;
424 }
425 
426 static void process_cmsg(struct msghdr *msgh)
427 {
428 	struct __kernel_timespec ts;
429 	bool inq_found = false;
430 	bool ts_found = false;
431 	unsigned int inq = 0;
432 	struct cmsghdr *cmsg;
433 
434 	for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) {
435 		if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) {
436 			memcpy(&ts, CMSG_DATA(cmsg), sizeof(ts));
437 			ts_found = true;
438 			continue;
439 		}
440 		if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) {
441 			memcpy(&inq, CMSG_DATA(cmsg), sizeof(inq));
442 			inq_found = true;
443 			continue;
444 		}
445 
446 	}
447 
448 	if (cfg_cmsg_types.timestampns) {
449 		if (!ts_found)
450 			xerror("TIMESTAMPNS not present\n");
451 	}
452 
453 	if (cfg_cmsg_types.tcp_inq) {
454 		if (!inq_found)
455 			xerror("TCP_INQ not present\n");
456 
457 		if (inq > 1024)
458 			xerror("tcp_inq %u is larger than one kbyte\n", inq);
459 		tcp_inq.last = inq;
460 	}
461 }
462 
463 static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len)
464 {
465 	char msg_buf[8192];
466 	struct iovec iov = {
467 		.iov_base = buf,
468 		.iov_len = len,
469 	};
470 	struct msghdr msg = {
471 		.msg_iov = &iov,
472 		.msg_iovlen = 1,
473 		.msg_control = msg_buf,
474 		.msg_controllen = sizeof(msg_buf),
475 	};
476 	int flags = 0;
477 	unsigned int last_hint = tcp_inq.last;
478 	int ret = recvmsg(fd, &msg, flags);
479 
480 	if (ret <= 0) {
481 		if (ret == 0 && tcp_inq.expect_eof)
482 			return ret;
483 
484 		if (ret == 0 && cfg_cmsg_types.tcp_inq)
485 			if (last_hint != 1 && last_hint != 0)
486 				xerror("EOF but last tcp_inq hint was %u\n", last_hint);
487 
488 		return ret;
489 	}
490 
491 	if (tcp_inq.expect_eof)
492 		xerror("expected EOF, last_hint %u, now %u\n",
493 		       last_hint, tcp_inq.last);
494 
495 	if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled)
496 		xerror("got %lu bytes of cmsg data, expected 0\n",
497 		       (unsigned long)msg.msg_controllen);
498 
499 	if (msg.msg_controllen == 0 && cfg_cmsg_types.cmsg_enabled)
500 		xerror("%s\n", "got no cmsg data");
501 
502 	if (msg.msg_controllen)
503 		process_cmsg(&msg);
504 
505 	if (cfg_cmsg_types.tcp_inq) {
506 		if ((size_t)ret < len && last_hint > (unsigned int)ret) {
507 			if (ret + 1 != (int)last_hint) {
508 				int next = read(fd, msg_buf, sizeof(msg_buf));
509 
510 				xerror("read %u of %u, last_hint was %u tcp_inq hint now %u next_read returned %d/%m\n",
511 				       ret, (unsigned int)len, last_hint, tcp_inq.last, next);
512 			} else {
513 				tcp_inq.expect_eof = true;
514 			}
515 		}
516 	}
517 
518 	return ret;
519 }
520 
521 static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
522 {
523 	int ret = 0;
524 	char tmp[16384];
525 	size_t cap = rand();
526 
527 	cap &= 0xffff;
528 
529 	if (cap == 0)
530 		cap = 1;
531 	else if (cap > len)
532 		cap = len;
533 
534 	if (cfg_peek == CFG_WITH_PEEK) {
535 		ret = recv(fd, buf, cap, MSG_PEEK);
536 		ret = (ret < 0) ? ret : read(fd, tmp, ret);
537 	} else if (cfg_peek == CFG_AFTER_PEEK) {
538 		ret = recv(fd, buf, cap, MSG_PEEK);
539 		ret = (ret < 0) ? ret : read(fd, buf, cap);
540 	} else if (cfg_cmsg_types.cmsg_enabled) {
541 		ret = do_recvmsg_cmsg(fd, buf, cap);
542 	} else {
543 		ret = read(fd, buf, cap);
544 	}
545 
546 	return ret;
547 }
548 
549 static void set_nonblock(int fd, bool nonblock)
550 {
551 	int flags = fcntl(fd, F_GETFL);
552 
553 	if (flags == -1)
554 		return;
555 
556 	if (nonblock)
557 		fcntl(fd, F_SETFL, flags | O_NONBLOCK);
558 	else
559 		fcntl(fd, F_SETFL, flags & ~O_NONBLOCK);
560 }
561 
562 static void shut_wr(int fd)
563 {
564 	/* Close our write side, ev. give some time
565 	 * for address notification and/or checking
566 	 * the current status
567 	 */
568 	if (cfg_wait)
569 		usleep(cfg_wait);
570 
571 	shutdown(fd, SHUT_WR);
572 }
573 
574 static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after_out)
575 {
576 	struct pollfd fds = {
577 		.fd = peerfd,
578 		.events = POLLIN | POLLOUT,
579 	};
580 	unsigned int woff = 0, wlen = 0, total_wlen = 0, total_rlen = 0;
581 	char wbuf[8192];
582 
583 	set_nonblock(peerfd, true);
584 
585 	for (;;) {
586 		char rbuf[8192];
587 		ssize_t len;
588 
589 		if (fds.events == 0)
590 			break;
591 
592 		switch (poll(&fds, 1, poll_timeout)) {
593 		case -1:
594 			if (errno == EINTR)
595 				continue;
596 			perror("poll");
597 			return 1;
598 		case 0:
599 			fprintf(stderr, "%s: poll timed out (events: "
600 				"POLLIN %u, POLLOUT %u)\n", __func__,
601 				fds.events & POLLIN, fds.events & POLLOUT);
602 			return 2;
603 		}
604 
605 		if (fds.revents & POLLIN) {
606 			ssize_t rb = sizeof(rbuf);
607 
608 			/* limit the total amount of read data to the trunc value*/
609 			if (cfg_truncate > 0) {
610 				if (rb + total_rlen > cfg_truncate)
611 					rb = cfg_truncate - total_rlen;
612 				len = read(peerfd, rbuf, rb);
613 			} else {
614 				len = do_rnd_read(peerfd, rbuf, sizeof(rbuf));
615 			}
616 			if (len == 0) {
617 				/* no more data to receive:
618 				 * peer has closed its write side
619 				 */
620 				fds.events &= ~POLLIN;
621 
622 				if ((fds.events & POLLOUT) == 0) {
623 					*in_closed_after_out = true;
624 					/* and nothing more to send */
625 					break;
626 				}
627 
628 			/* Else, still have data to transmit */
629 			} else if (len < 0) {
630 				if (cfg_rcv_trunc)
631 					return 0;
632 				perror("read");
633 				return 3;
634 			}
635 
636 			total_rlen += len;
637 			do_write(outfd, rbuf, len);
638 		}
639 
640 		if (fds.revents & POLLOUT) {
641 			if (wlen == 0) {
642 				woff = 0;
643 				wlen = read(infd, wbuf, sizeof(wbuf));
644 			}
645 
646 			if (wlen > 0) {
647 				ssize_t bw;
648 
649 				/* limit the total amount of written data to the trunc value */
650 				if (cfg_truncate > 0 && wlen + total_wlen > cfg_truncate)
651 					wlen = cfg_truncate - total_wlen;
652 
653 				bw = do_rnd_write(peerfd, wbuf + woff, wlen);
654 				if (bw < 0) {
655 					if (cfg_rcv_trunc)
656 						return 0;
657 					perror("write");
658 					return 111;
659 				}
660 
661 				woff += bw;
662 				wlen -= bw;
663 				total_wlen += bw;
664 			} else if (wlen == 0) {
665 				/* We have no more data to send. */
666 				fds.events &= ~POLLOUT;
667 
668 				if ((fds.events & POLLIN) == 0)
669 					/* ... and peer also closed already */
670 					break;
671 
672 				shut_wr(peerfd);
673 			} else {
674 				if (errno == EINTR)
675 					continue;
676 				perror("read");
677 				return 4;
678 			}
679 		}
680 
681 		if (fds.revents & (POLLERR | POLLNVAL)) {
682 			if (cfg_rcv_trunc)
683 				return 0;
684 			fprintf(stderr, "Unexpected revents: "
685 				"POLLERR/POLLNVAL(%x)\n", fds.revents);
686 			return 5;
687 		}
688 
689 		if (cfg_truncate > 0 && total_wlen >= cfg_truncate &&
690 		    total_rlen >= cfg_truncate)
691 			break;
692 	}
693 
694 	/* leave some time for late join/announce */
695 	if (cfg_remove)
696 		usleep(cfg_wait);
697 
698 	return 0;
699 }
700 
701 static int do_recvfile(int infd, int outfd)
702 {
703 	ssize_t r;
704 
705 	do {
706 		char buf[16384];
707 
708 		r = do_rnd_read(infd, buf, sizeof(buf));
709 		if (r > 0) {
710 			if (write(outfd, buf, r) != r)
711 				break;
712 		} else if (r < 0) {
713 			perror("read");
714 		}
715 	} while (r > 0);
716 
717 	return (int)r;
718 }
719 
720 static int do_mmap(int infd, int outfd, unsigned int size)
721 {
722 	char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0);
723 	ssize_t ret = 0, off = 0;
724 	size_t rem;
725 
726 	if (inbuf == MAP_FAILED) {
727 		perror("mmap");
728 		return 1;
729 	}
730 
731 	rem = size;
732 
733 	while (rem > 0) {
734 		ret = write(outfd, inbuf + off, rem);
735 
736 		if (ret < 0) {
737 			perror("write");
738 			break;
739 		}
740 
741 		off += ret;
742 		rem -= ret;
743 	}
744 
745 	munmap(inbuf, size);
746 	return rem;
747 }
748 
749 static int get_infd_size(int fd)
750 {
751 	struct stat sb;
752 	ssize_t count;
753 	int err;
754 
755 	err = fstat(fd, &sb);
756 	if (err < 0) {
757 		perror("fstat");
758 		return -1;
759 	}
760 
761 	if ((sb.st_mode & S_IFMT) != S_IFREG) {
762 		fprintf(stderr, "%s: stdin is not a regular file\n", __func__);
763 		return -2;
764 	}
765 
766 	count = sb.st_size;
767 	if (count > INT_MAX) {
768 		fprintf(stderr, "File too large: %zu\n", count);
769 		return -3;
770 	}
771 
772 	return (int)count;
773 }
774 
775 static int do_sendfile(int infd, int outfd, unsigned int count)
776 {
777 	while (count > 0) {
778 		ssize_t r;
779 
780 		r = sendfile(outfd, infd, NULL, count);
781 		if (r < 0) {
782 			perror("sendfile");
783 			return 3;
784 		}
785 
786 		count -= r;
787 	}
788 
789 	return 0;
790 }
791 
792 static int copyfd_io_mmap(int infd, int peerfd, int outfd,
793 			  unsigned int size, bool *in_closed_after_out)
794 {
795 	int err;
796 
797 	if (listen_mode) {
798 		err = do_recvfile(peerfd, outfd);
799 		if (err)
800 			return err;
801 
802 		err = do_mmap(infd, peerfd, size);
803 	} else {
804 		err = do_mmap(infd, peerfd, size);
805 		if (err)
806 			return err;
807 
808 		shut_wr(peerfd);
809 
810 		err = do_recvfile(peerfd, outfd);
811 		*in_closed_after_out = true;
812 	}
813 
814 	return err;
815 }
816 
817 static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
818 			      unsigned int size, bool *in_closed_after_out)
819 {
820 	int err;
821 
822 	if (listen_mode) {
823 		err = do_recvfile(peerfd, outfd);
824 		if (err)
825 			return err;
826 
827 		err = do_sendfile(infd, peerfd, size);
828 	} else {
829 		err = do_sendfile(infd, peerfd, size);
830 		if (err)
831 			return err;
832 
833 		shut_wr(peerfd);
834 
835 		err = do_recvfile(peerfd, outfd);
836 		*in_closed_after_out = true;
837 	}
838 
839 	return err;
840 }
841 
842 static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd)
843 {
844 	bool in_closed_after_out = false;
845 	struct timespec start, end;
846 	int file_size;
847 	int ret;
848 
849 	if (cfg_time && (clock_gettime(CLOCK_MONOTONIC, &start) < 0))
850 		xerror("can not fetch start time %d", errno);
851 
852 	switch (cfg_mode) {
853 	case CFG_MODE_POLL:
854 		ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out);
855 		break;
856 
857 	case CFG_MODE_MMAP:
858 		file_size = get_infd_size(infd);
859 		if (file_size < 0)
860 			return file_size;
861 		ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, &in_closed_after_out);
862 		break;
863 
864 	case CFG_MODE_SENDFILE:
865 		file_size = get_infd_size(infd);
866 		if (file_size < 0)
867 			return file_size;
868 		ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, &in_closed_after_out);
869 		break;
870 
871 	default:
872 		fprintf(stderr, "Invalid mode %d\n", cfg_mode);
873 
874 		die_usage();
875 		return 1;
876 	}
877 
878 	if (ret)
879 		return ret;
880 
881 	if (close_peerfd)
882 		close(peerfd);
883 
884 	if (cfg_time) {
885 		unsigned int delta_ms;
886 
887 		if (clock_gettime(CLOCK_MONOTONIC, &end) < 0)
888 			xerror("can not fetch end time %d", errno);
889 		delta_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000;
890 		if (delta_ms > cfg_time) {
891 			xerror("transfer slower than expected! runtime %d ms, expected %d ms",
892 			       delta_ms, cfg_time);
893 		}
894 
895 		/* show the runtime only if this end shutdown(wr) before receiving the EOF,
896 		 * (that is, if this end got the longer runtime)
897 		 */
898 		if (in_closed_after_out)
899 			fprintf(stderr, "%d", delta_ms);
900 	}
901 
902 	return 0;
903 }
904 
905 static void check_sockaddr(int pf, struct sockaddr_storage *ss,
906 			   socklen_t salen)
907 {
908 	struct sockaddr_in6 *sin6;
909 	struct sockaddr_in *sin;
910 	socklen_t wanted_size = 0;
911 
912 	switch (pf) {
913 	case AF_INET:
914 		wanted_size = sizeof(*sin);
915 		sin = (void *)ss;
916 		if (!sin->sin_port)
917 			fprintf(stderr, "accept: something wrong: ip connection from port 0");
918 		break;
919 	case AF_INET6:
920 		wanted_size = sizeof(*sin6);
921 		sin6 = (void *)ss;
922 		if (!sin6->sin6_port)
923 			fprintf(stderr, "accept: something wrong: ipv6 connection from port 0");
924 		break;
925 	default:
926 		fprintf(stderr, "accept: Unknown pf %d, salen %u\n", pf, salen);
927 		return;
928 	}
929 
930 	if (salen != wanted_size)
931 		fprintf(stderr, "accept: size mismatch, got %d expected %d\n",
932 			(int)salen, wanted_size);
933 
934 	if (ss->ss_family != pf)
935 		fprintf(stderr, "accept: pf mismatch, expect %d, ss_family is %d\n",
936 			(int)ss->ss_family, pf);
937 }
938 
939 static void check_getpeername(int fd, struct sockaddr_storage *ss, socklen_t salen)
940 {
941 	struct sockaddr_storage peerss;
942 	socklen_t peersalen = sizeof(peerss);
943 
944 	if (getpeername(fd, (struct sockaddr *)&peerss, &peersalen) < 0) {
945 		perror("getpeername");
946 		return;
947 	}
948 
949 	if (peersalen != salen) {
950 		fprintf(stderr, "%s: %d vs %d\n", __func__, peersalen, salen);
951 		return;
952 	}
953 
954 	if (memcmp(ss, &peerss, peersalen)) {
955 		char a[INET6_ADDRSTRLEN];
956 		char b[INET6_ADDRSTRLEN];
957 		char c[INET6_ADDRSTRLEN];
958 		char d[INET6_ADDRSTRLEN];
959 
960 		xgetnameinfo((struct sockaddr *)ss, salen,
961 			     a, sizeof(a), b, sizeof(b));
962 
963 		xgetnameinfo((struct sockaddr *)&peerss, peersalen,
964 			     c, sizeof(c), d, sizeof(d));
965 
966 		fprintf(stderr, "%s: memcmp failure: accept %s vs peername %s, %s vs %s salen %d vs %d\n",
967 			__func__, a, c, b, d, peersalen, salen);
968 	}
969 }
970 
971 static void check_getpeername_connect(int fd)
972 {
973 	struct sockaddr_storage ss;
974 	socklen_t salen = sizeof(ss);
975 	char a[INET6_ADDRSTRLEN];
976 	char b[INET6_ADDRSTRLEN];
977 
978 	if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) {
979 		perror("getpeername");
980 		return;
981 	}
982 
983 	xgetnameinfo((struct sockaddr *)&ss, salen,
984 		     a, sizeof(a), b, sizeof(b));
985 
986 	if (strcmp(cfg_host, a) || strcmp(cfg_port, b))
987 		fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__,
988 			cfg_host, a, cfg_port, b);
989 }
990 
991 static void maybe_close(int fd)
992 {
993 	unsigned int r = rand();
994 
995 	if (!(cfg_join || cfg_remove || cfg_repeat > 1) && (r & 1))
996 		close(fd);
997 }
998 
999 int main_loop_s(int listensock)
1000 {
1001 	struct sockaddr_storage ss;
1002 	struct pollfd polls;
1003 	socklen_t salen;
1004 	int remotesock;
1005 	int fd = 0;
1006 
1007 again:
1008 	polls.fd = listensock;
1009 	polls.events = POLLIN;
1010 
1011 	switch (poll(&polls, 1, poll_timeout)) {
1012 	case -1:
1013 		perror("poll");
1014 		return 1;
1015 	case 0:
1016 		fprintf(stderr, "%s: timed out\n", __func__);
1017 		close(listensock);
1018 		return 2;
1019 	}
1020 
1021 	salen = sizeof(ss);
1022 	remotesock = accept(listensock, (struct sockaddr *)&ss, &salen);
1023 	if (remotesock >= 0) {
1024 		maybe_close(listensock);
1025 		check_sockaddr(pf, &ss, salen);
1026 		check_getpeername(remotesock, &ss, salen);
1027 
1028 		if (cfg_input) {
1029 			fd = open(cfg_input, O_RDONLY);
1030 			if (fd < 0)
1031 				xerror("can't open %s: %d", cfg_input, errno);
1032 		}
1033 
1034 		SOCK_TEST_TCPULP(remotesock, 0);
1035 
1036 		copyfd_io(fd, remotesock, 1, true);
1037 	} else {
1038 		perror("accept");
1039 		return 1;
1040 	}
1041 
1042 	if (--cfg_repeat > 0) {
1043 		if (cfg_input)
1044 			close(fd);
1045 		goto again;
1046 	}
1047 
1048 	return 0;
1049 }
1050 
1051 static void init_rng(void)
1052 {
1053 	int fd = open("/dev/urandom", O_RDONLY);
1054 	unsigned int foo;
1055 
1056 	if (fd > 0) {
1057 		int ret = read(fd, &foo, sizeof(foo));
1058 
1059 		if (ret < 0)
1060 			srand(fd + foo);
1061 		close(fd);
1062 	}
1063 
1064 	srand(foo);
1065 }
1066 
1067 static void xsetsockopt(int fd, int level, int optname, const void *optval, socklen_t optlen)
1068 {
1069 	int err;
1070 
1071 	err = setsockopt(fd, level, optname, optval, optlen);
1072 	if (err) {
1073 		perror("setsockopt");
1074 		exit(1);
1075 	}
1076 }
1077 
1078 static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg)
1079 {
1080 	static const unsigned int on = 1;
1081 
1082 	if (cmsg->timestampns)
1083 		xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on));
1084 	if (cmsg->tcp_inq)
1085 		xsetsockopt(fd, IPPROTO_TCP, TCP_INQ, &on, sizeof(on));
1086 }
1087 
1088 static void parse_cmsg_types(const char *type)
1089 {
1090 	char *next = strchr(type, ',');
1091 	unsigned int len = 0;
1092 
1093 	cfg_cmsg_types.cmsg_enabled = 1;
1094 
1095 	if (next) {
1096 		parse_cmsg_types(next + 1);
1097 		len = next - type;
1098 	} else {
1099 		len = strlen(type);
1100 	}
1101 
1102 	if (strncmp(type, "TIMESTAMPNS", len) == 0) {
1103 		cfg_cmsg_types.timestampns = 1;
1104 		return;
1105 	}
1106 
1107 	if (strncmp(type, "TCPINQ", len) == 0) {
1108 		cfg_cmsg_types.tcp_inq = 1;
1109 		return;
1110 	}
1111 
1112 	fprintf(stderr, "Unrecognized cmsg option %s\n", type);
1113 	exit(1);
1114 }
1115 
1116 static void parse_setsock_options(const char *name)
1117 {
1118 	char *next = strchr(name, ',');
1119 	unsigned int len = 0;
1120 
1121 	if (next) {
1122 		parse_setsock_options(next + 1);
1123 		len = next - name;
1124 	} else {
1125 		len = strlen(name);
1126 	}
1127 
1128 	if (strncmp(name, "TRANSPARENT", len) == 0) {
1129 		cfg_sockopt_types.transparent = 1;
1130 		return;
1131 	}
1132 
1133 	fprintf(stderr, "Unrecognized setsockopt option %s\n", name);
1134 	exit(1);
1135 }
1136 
1137 void xdisconnect(int fd, int addrlen)
1138 {
1139 	struct sockaddr_storage empty;
1140 	int msec_sleep = 10;
1141 	int queued = 1;
1142 	int i;
1143 
1144 	shutdown(fd, SHUT_WR);
1145 
1146 	/* while until the pending data is completely flushed, the later
1147 	 * disconnect will bypass/ignore/drop any pending data.
1148 	 */
1149 	for (i = 0; ; i += msec_sleep) {
1150 		if (ioctl(fd, SIOCOUTQ, &queued) < 0)
1151 			xerror("can't query out socket queue: %d", errno);
1152 
1153 		if (!queued)
1154 			break;
1155 
1156 		if (i > poll_timeout)
1157 			xerror("timeout while waiting for spool to complete");
1158 		usleep(msec_sleep * 1000);
1159 	}
1160 
1161 	memset(&empty, 0, sizeof(empty));
1162 	empty.ss_family = AF_UNSPEC;
1163 	if (connect(fd, (struct sockaddr *)&empty, addrlen) < 0)
1164 		xerror("can't disconnect: %d", errno);
1165 }
1166 
1167 int main_loop(void)
1168 {
1169 	int fd, ret, fd_in = 0;
1170 	struct addrinfo *peer;
1171 
1172 	/* listener is ready. */
1173 	fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer);
1174 	if (fd < 0)
1175 		return 2;
1176 
1177 again:
1178 	check_getpeername_connect(fd);
1179 
1180 	SOCK_TEST_TCPULP(fd, cfg_sock_proto);
1181 
1182 	if (cfg_rcvbuf)
1183 		set_rcvbuf(fd, cfg_rcvbuf);
1184 	if (cfg_sndbuf)
1185 		set_sndbuf(fd, cfg_sndbuf);
1186 	if (cfg_cmsg_types.cmsg_enabled)
1187 		apply_cmsg_types(fd, &cfg_cmsg_types);
1188 
1189 	if (cfg_input) {
1190 		fd_in = open(cfg_input, O_RDONLY);
1191 		if (fd < 0)
1192 			xerror("can't open %s:%d", cfg_input, errno);
1193 	}
1194 
1195 	/* close the client socket open only if we are not going to reconnect */
1196 	ret = copyfd_io(fd_in, fd, 1, 0);
1197 	if (ret)
1198 		return ret;
1199 
1200 	if (cfg_truncate > 0) {
1201 		xdisconnect(fd, peer->ai_addrlen);
1202 	} else if (--cfg_repeat > 0) {
1203 		xdisconnect(fd, peer->ai_addrlen);
1204 
1205 		/* the socket could be unblocking at this point, we need the
1206 		 * connect to be blocking
1207 		 */
1208 		set_nonblock(fd, false);
1209 		if (connect(fd, peer->ai_addr, peer->ai_addrlen))
1210 			xerror("can't reconnect: %d", errno);
1211 		if (cfg_input)
1212 			close(fd_in);
1213 		goto again;
1214 	} else {
1215 		close(fd);
1216 	}
1217 
1218 	return 0;
1219 }
1220 
1221 int parse_proto(const char *proto)
1222 {
1223 	if (!strcasecmp(proto, "MPTCP"))
1224 		return IPPROTO_MPTCP;
1225 	if (!strcasecmp(proto, "TCP"))
1226 		return IPPROTO_TCP;
1227 
1228 	fprintf(stderr, "Unknown protocol: %s\n.", proto);
1229 	die_usage();
1230 
1231 	/* silence compiler warning */
1232 	return 0;
1233 }
1234 
1235 int parse_mode(const char *mode)
1236 {
1237 	if (!strcasecmp(mode, "poll"))
1238 		return CFG_MODE_POLL;
1239 	if (!strcasecmp(mode, "mmap"))
1240 		return CFG_MODE_MMAP;
1241 	if (!strcasecmp(mode, "sendfile"))
1242 		return CFG_MODE_SENDFILE;
1243 
1244 	fprintf(stderr, "Unknown test mode: %s\n", mode);
1245 	fprintf(stderr, "Supported modes are:\n");
1246 	fprintf(stderr, "\t\t\"poll\" - interleaved read/write using poll()\n");
1247 	fprintf(stderr, "\t\t\"mmap\" - send entire input file (mmap+write), then read response (-l will read input first)\n");
1248 	fprintf(stderr, "\t\t\"sendfile\" - send entire input file (sendfile), then read response (-l will read input first)\n");
1249 
1250 	die_usage();
1251 
1252 	/* silence compiler warning */
1253 	return 0;
1254 }
1255 
1256 int parse_peek(const char *mode)
1257 {
1258 	if (!strcasecmp(mode, "saveWithPeek"))
1259 		return CFG_WITH_PEEK;
1260 	if (!strcasecmp(mode, "saveAfterPeek"))
1261 		return CFG_AFTER_PEEK;
1262 
1263 	fprintf(stderr, "Unknown: %s\n", mode);
1264 	fprintf(stderr, "Supported MSG_PEEK mode are:\n");
1265 	fprintf(stderr,
1266 		"\t\t\"saveWithPeek\" - recv data with flags 'MSG_PEEK' and save the peek data into file\n");
1267 	fprintf(stderr,
1268 		"\t\t\"saveAfterPeek\" - read and save data into file after recv with flags 'MSG_PEEK'\n");
1269 
1270 	die_usage();
1271 
1272 	/* silence compiler warning */
1273 	return 0;
1274 }
1275 
1276 static int parse_int(const char *size)
1277 {
1278 	unsigned long s;
1279 
1280 	errno = 0;
1281 
1282 	s = strtoul(size, NULL, 0);
1283 
1284 	if (errno) {
1285 		fprintf(stderr, "Invalid sndbuf size %s (%s)\n",
1286 			size, strerror(errno));
1287 		die_usage();
1288 	}
1289 
1290 	if (s > INT_MAX) {
1291 		fprintf(stderr, "Invalid sndbuf size %s (%s)\n",
1292 			size, strerror(ERANGE));
1293 		die_usage();
1294 	}
1295 
1296 	return (int)s;
1297 }
1298 
1299 static void parse_opts(int argc, char **argv)
1300 {
1301 	int c;
1302 
1303 	while ((c = getopt(argc, argv, "6c:f:hi:I:jlm:M:o:p:P:r:R:s:S:t:T:w:")) != -1) {
1304 		switch (c) {
1305 		case 'f':
1306 			cfg_truncate = atoi(optarg);
1307 
1308 			/* when receiving a fastclose, ignore PIPE signals and
1309 			 * all the I/O errors later in the code
1310 			 */
1311 			if (cfg_truncate < 0) {
1312 				cfg_rcv_trunc = true;
1313 				signal(SIGPIPE, handle_signal);
1314 			}
1315 			break;
1316 		case 'j':
1317 			cfg_join = true;
1318 			cfg_mode = CFG_MODE_POLL;
1319 			break;
1320 		case 'r':
1321 			cfg_remove = true;
1322 			cfg_mode = CFG_MODE_POLL;
1323 			cfg_wait = 400000;
1324 			cfg_do_w = atoi(optarg);
1325 			if (cfg_do_w <= 0)
1326 				cfg_do_w = 50;
1327 			break;
1328 		case 'i':
1329 			cfg_input = optarg;
1330 			break;
1331 		case 'I':
1332 			cfg_repeat = atoi(optarg);
1333 			break;
1334 		case 'l':
1335 			listen_mode = true;
1336 			break;
1337 		case 'p':
1338 			cfg_port = optarg;
1339 			break;
1340 		case 's':
1341 			cfg_sock_proto = parse_proto(optarg);
1342 			break;
1343 		case 'h':
1344 			die_usage();
1345 			break;
1346 		case '6':
1347 			pf = AF_INET6;
1348 			break;
1349 		case 't':
1350 			poll_timeout = atoi(optarg) * 1000;
1351 			if (poll_timeout <= 0)
1352 				poll_timeout = -1;
1353 			break;
1354 		case 'T':
1355 			cfg_time = atoi(optarg);
1356 			break;
1357 		case 'm':
1358 			cfg_mode = parse_mode(optarg);
1359 			break;
1360 		case 'S':
1361 			cfg_sndbuf = parse_int(optarg);
1362 			break;
1363 		case 'R':
1364 			cfg_rcvbuf = parse_int(optarg);
1365 			break;
1366 		case 'w':
1367 			cfg_wait = atoi(optarg)*1000000;
1368 			break;
1369 		case 'M':
1370 			cfg_mark = strtol(optarg, NULL, 0);
1371 			break;
1372 		case 'P':
1373 			cfg_peek = parse_peek(optarg);
1374 			break;
1375 		case 'c':
1376 			parse_cmsg_types(optarg);
1377 			break;
1378 		case 'o':
1379 			parse_setsock_options(optarg);
1380 			break;
1381 		}
1382 	}
1383 
1384 	if (optind + 1 != argc)
1385 		die_usage();
1386 	cfg_host = argv[optind];
1387 
1388 	if (strchr(cfg_host, ':'))
1389 		pf = AF_INET6;
1390 }
1391 
1392 int main(int argc, char *argv[])
1393 {
1394 	init_rng();
1395 
1396 	signal(SIGUSR1, handle_signal);
1397 	parse_opts(argc, argv);
1398 
1399 	if (listen_mode) {
1400 		int fd = sock_listen_mptcp(cfg_host, cfg_port);
1401 
1402 		if (fd < 0)
1403 			return 1;
1404 
1405 		if (cfg_rcvbuf)
1406 			set_rcvbuf(fd, cfg_rcvbuf);
1407 		if (cfg_sndbuf)
1408 			set_sndbuf(fd, cfg_sndbuf);
1409 		if (cfg_mark)
1410 			set_mark(fd, cfg_mark);
1411 		if (cfg_cmsg_types.cmsg_enabled)
1412 			apply_cmsg_types(fd, &cfg_cmsg_types);
1413 
1414 		return main_loop_s(fd);
1415 	}
1416 
1417 	return main_loop();
1418 }
1419