1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <sys/socket.h>
6 #include <sys/ioctl.h>
7 #include <sys/select.h>
8 #include <netinet/in.h>
9 #include <arpa/inet.h>
10 #include <unistd.h>
11 #include <string.h>
12 #include <errno.h>
13 #include <stdbool.h>
14 #include <signal.h>
15 #include <fcntl.h>
16 #include <sys/wait.h>
17 #include <time.h>
18 #include <sched.h>
19 
20 #include <sys/time.h>
21 #include <sys/resource.h>
22 #include <sys/types.h>
23 #include <sys/sendfile.h>
24 
25 #include <linux/netlink.h>
26 #include <linux/socket.h>
27 #include <linux/sock_diag.h>
28 #include <linux/bpf.h>
29 #include <linux/if_link.h>
30 #include <linux/tls.h>
31 #include <assert.h>
32 #include <libgen.h>
33 
34 #include <getopt.h>
35 
36 #include <bpf/bpf.h>
37 #include <bpf/libbpf.h>
38 
39 #include "bpf_util.h"
40 #include "bpf_rlimit.h"
41 #include "cgroup_helpers.h"
42 
43 int running;
44 static void running_handler(int a);
45 
46 #ifndef TCP_ULP
47 # define TCP_ULP 31
48 #endif
49 #ifndef SOL_TLS
50 # define SOL_TLS 282
51 #endif
52 
53 /* randomly selected ports for testing on lo */
54 #define S1_PORT 10000
55 #define S2_PORT 10001
56 
57 #define BPF_SOCKMAP_FILENAME  "test_sockmap_kern.o"
58 #define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o"
59 #define CG_PATH "/sockmap"
60 
61 /* global sockets */
62 int s1, s2, c1, c2, p1, p2;
63 int test_cnt;
64 int passed;
65 int failed;
66 int map_fd[9];
67 struct bpf_map *maps[9];
68 int prog_fd[11];
69 
70 int txmsg_pass;
71 int txmsg_redir;
72 int txmsg_drop;
73 int txmsg_apply;
74 int txmsg_cork;
75 int txmsg_start;
76 int txmsg_end;
77 int txmsg_start_push;
78 int txmsg_end_push;
79 int txmsg_start_pop;
80 int txmsg_pop;
81 int txmsg_ingress;
82 int txmsg_redir_skb;
83 int txmsg_ktls_skb;
84 int txmsg_ktls_skb_drop;
85 int txmsg_ktls_skb_redir;
86 int ktls;
87 int peek_flag;
88 int skb_use_parser;
89 int txmsg_omit_skb_parser;
90 
91 static const struct option long_options[] = {
92 	{"help",	no_argument,		NULL, 'h' },
93 	{"cgroup",	required_argument,	NULL, 'c' },
94 	{"rate",	required_argument,	NULL, 'r' },
95 	{"verbose",	optional_argument,	NULL, 'v' },
96 	{"iov_count",	required_argument,	NULL, 'i' },
97 	{"length",	required_argument,	NULL, 'l' },
98 	{"test",	required_argument,	NULL, 't' },
99 	{"data_test",   no_argument,		NULL, 'd' },
100 	{"txmsg",		no_argument,	&txmsg_pass,  1  },
101 	{"txmsg_redir",		no_argument,	&txmsg_redir, 1  },
102 	{"txmsg_drop",		no_argument,	&txmsg_drop, 1 },
103 	{"txmsg_apply",	required_argument,	NULL, 'a'},
104 	{"txmsg_cork",	required_argument,	NULL, 'k'},
105 	{"txmsg_start", required_argument,	NULL, 's'},
106 	{"txmsg_end",	required_argument,	NULL, 'e'},
107 	{"txmsg_start_push", required_argument,	NULL, 'p'},
108 	{"txmsg_end_push",   required_argument,	NULL, 'q'},
109 	{"txmsg_start_pop",  required_argument,	NULL, 'w'},
110 	{"txmsg_pop",	     required_argument,	NULL, 'x'},
111 	{"txmsg_ingress", no_argument,		&txmsg_ingress, 1 },
112 	{"txmsg_redir_skb", no_argument,	&txmsg_redir_skb, 1 },
113 	{"ktls", no_argument,			&ktls, 1 },
114 	{"peek", no_argument,			&peek_flag, 1 },
115 	{"txmsg_omit_skb_parser", no_argument,      &txmsg_omit_skb_parser, 1},
116 	{"whitelist", required_argument,	NULL, 'n' },
117 	{"blacklist", required_argument,	NULL, 'b' },
118 	{0, 0, NULL, 0 }
119 };
120 
121 struct test_env {
122 	const char *type;
123 	const char *subtest;
124 	const char *prepend;
125 
126 	int test_num;
127 	int subtest_num;
128 
129 	int succ_cnt;
130 	int fail_cnt;
131 	int fail_last;
132 };
133 
134 struct test_env env;
135 
136 struct sockmap_options {
137 	int verbose;
138 	bool base;
139 	bool sendpage;
140 	bool data_test;
141 	bool drop_expected;
142 	bool check_recved_len;
143 	int iov_count;
144 	int iov_length;
145 	int rate;
146 	char *map;
147 	char *whitelist;
148 	char *blacklist;
149 	char *prepend;
150 };
151 
152 struct _test {
153 	char *title;
154 	void (*tester)(int cg_fd, struct sockmap_options *opt);
155 };
156 
157 static void test_start(void)
158 {
159 	env.subtest_num++;
160 }
161 
162 static void test_fail(void)
163 {
164 	env.fail_cnt++;
165 }
166 
167 static void test_pass(void)
168 {
169 	env.succ_cnt++;
170 }
171 
172 static void test_reset(void)
173 {
174 	txmsg_start = txmsg_end = 0;
175 	txmsg_start_pop = txmsg_pop = 0;
176 	txmsg_start_push = txmsg_end_push = 0;
177 	txmsg_pass = txmsg_drop = txmsg_redir = 0;
178 	txmsg_apply = txmsg_cork = 0;
179 	txmsg_ingress = txmsg_redir_skb = 0;
180 	txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
181 	txmsg_omit_skb_parser = 0;
182 	skb_use_parser = 0;
183 }
184 
185 static int test_start_subtest(const struct _test *t, struct sockmap_options *o)
186 {
187 	env.type = o->map;
188 	env.subtest = t->title;
189 	env.prepend = o->prepend;
190 	env.test_num++;
191 	env.subtest_num = 0;
192 	env.fail_last = env.fail_cnt;
193 	test_reset();
194 	return 0;
195 }
196 
197 static void test_end_subtest(void)
198 {
199 	int error = env.fail_cnt - env.fail_last;
200 	int type = strcmp(env.type, BPF_SOCKMAP_FILENAME);
201 
202 	if (!error)
203 		test_pass();
204 
205 	fprintf(stdout, "#%2d/%2d %8s:%s:%s:%s\n",
206 		env.test_num, env.subtest_num,
207 		!type ? "sockmap" : "sockhash",
208 		env.prepend ? : "",
209 		env.subtest, error ? "FAIL" : "OK");
210 }
211 
212 static void test_print_results(void)
213 {
214 	fprintf(stdout, "Pass: %d Fail: %d\n",
215 		env.succ_cnt, env.fail_cnt);
216 }
217 
218 static void usage(char *argv[])
219 {
220 	int i;
221 
222 	printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
223 	printf(" options:\n");
224 	for (i = 0; long_options[i].name != 0; i++) {
225 		printf(" --%-12s", long_options[i].name);
226 		if (long_options[i].flag != NULL)
227 			printf(" flag (internal value:%d)\n",
228 				*long_options[i].flag);
229 		else
230 			printf(" -%c\n", long_options[i].val);
231 	}
232 	printf("\n");
233 }
234 
235 char *sock_to_string(int s)
236 {
237 	if (s == c1)
238 		return "client1";
239 	else if (s == c2)
240 		return "client2";
241 	else if (s == s1)
242 		return "server1";
243 	else if (s == s2)
244 		return "server2";
245 	else if (s == p1)
246 		return "peer1";
247 	else if (s == p2)
248 		return "peer2";
249 	else
250 		return "unknown";
251 }
252 
253 static int sockmap_init_ktls(int verbose, int s)
254 {
255 	struct tls12_crypto_info_aes_gcm_128 tls_tx = {
256 		.info = {
257 			.version     = TLS_1_2_VERSION,
258 			.cipher_type = TLS_CIPHER_AES_GCM_128,
259 		},
260 	};
261 	struct tls12_crypto_info_aes_gcm_128 tls_rx = {
262 		.info = {
263 			.version     = TLS_1_2_VERSION,
264 			.cipher_type = TLS_CIPHER_AES_GCM_128,
265 		},
266 	};
267 	int so_buf = 6553500;
268 	int err;
269 
270 	err = setsockopt(s, 6, TCP_ULP, "tls", sizeof("tls"));
271 	if (err) {
272 		fprintf(stderr, "setsockopt: TCP_ULP(%s) failed with error %i\n", sock_to_string(s), err);
273 		return -EINVAL;
274 	}
275 	err = setsockopt(s, SOL_TLS, TLS_TX, (void *)&tls_tx, sizeof(tls_tx));
276 	if (err) {
277 		fprintf(stderr, "setsockopt: TLS_TX(%s) failed with error %i\n", sock_to_string(s), err);
278 		return -EINVAL;
279 	}
280 	err = setsockopt(s, SOL_TLS, TLS_RX, (void *)&tls_rx, sizeof(tls_rx));
281 	if (err) {
282 		fprintf(stderr, "setsockopt: TLS_RX(%s) failed with error %i\n", sock_to_string(s), err);
283 		return -EINVAL;
284 	}
285 	err = setsockopt(s, SOL_SOCKET, SO_SNDBUF, &so_buf, sizeof(so_buf));
286 	if (err) {
287 		fprintf(stderr, "setsockopt: (%s) failed sndbuf with error %i\n", sock_to_string(s), err);
288 		return -EINVAL;
289 	}
290 	err = setsockopt(s, SOL_SOCKET, SO_RCVBUF, &so_buf, sizeof(so_buf));
291 	if (err) {
292 		fprintf(stderr, "setsockopt: (%s) failed rcvbuf with error %i\n", sock_to_string(s), err);
293 		return -EINVAL;
294 	}
295 
296 	if (verbose)
297 		fprintf(stdout, "socket(%s) kTLS enabled\n", sock_to_string(s));
298 	return 0;
299 }
300 static int sockmap_init_sockets(int verbose)
301 {
302 	int i, err, one = 1;
303 	struct sockaddr_in addr;
304 	int *fds[4] = {&s1, &s2, &c1, &c2};
305 
306 	s1 = s2 = p1 = p2 = c1 = c2 = 0;
307 
308 	/* Init sockets */
309 	for (i = 0; i < 4; i++) {
310 		*fds[i] = socket(AF_INET, SOCK_STREAM, 0);
311 		if (*fds[i] < 0) {
312 			perror("socket s1 failed()");
313 			return errno;
314 		}
315 	}
316 
317 	/* Allow reuse */
318 	for (i = 0; i < 2; i++) {
319 		err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
320 				 (char *)&one, sizeof(one));
321 		if (err) {
322 			perror("setsockopt failed()");
323 			return errno;
324 		}
325 	}
326 
327 	/* Non-blocking sockets */
328 	for (i = 0; i < 2; i++) {
329 		err = ioctl(*fds[i], FIONBIO, (char *)&one);
330 		if (err < 0) {
331 			perror("ioctl s1 failed()");
332 			return errno;
333 		}
334 	}
335 
336 	/* Bind server sockets */
337 	memset(&addr, 0, sizeof(struct sockaddr_in));
338 	addr.sin_family = AF_INET;
339 	addr.sin_addr.s_addr = inet_addr("127.0.0.1");
340 
341 	addr.sin_port = htons(S1_PORT);
342 	err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
343 	if (err < 0) {
344 		perror("bind s1 failed()");
345 		return errno;
346 	}
347 
348 	addr.sin_port = htons(S2_PORT);
349 	err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
350 	if (err < 0) {
351 		perror("bind s2 failed()");
352 		return errno;
353 	}
354 
355 	/* Listen server sockets */
356 	addr.sin_port = htons(S1_PORT);
357 	err = listen(s1, 32);
358 	if (err < 0) {
359 		perror("listen s1 failed()");
360 		return errno;
361 	}
362 
363 	addr.sin_port = htons(S2_PORT);
364 	err = listen(s2, 32);
365 	if (err < 0) {
366 		perror("listen s1 failed()");
367 		return errno;
368 	}
369 
370 	/* Initiate Connect */
371 	addr.sin_port = htons(S1_PORT);
372 	err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
373 	if (err < 0 && errno != EINPROGRESS) {
374 		perror("connect c1 failed()");
375 		return errno;
376 	}
377 
378 	addr.sin_port = htons(S2_PORT);
379 	err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
380 	if (err < 0 && errno != EINPROGRESS) {
381 		perror("connect c2 failed()");
382 		return errno;
383 	} else if (err < 0) {
384 		err = 0;
385 	}
386 
387 	/* Accept Connecrtions */
388 	p1 = accept(s1, NULL, NULL);
389 	if (p1 < 0) {
390 		perror("accept s1 failed()");
391 		return errno;
392 	}
393 
394 	p2 = accept(s2, NULL, NULL);
395 	if (p2 < 0) {
396 		perror("accept s1 failed()");
397 		return errno;
398 	}
399 
400 	if (verbose > 1) {
401 		printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
402 		printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
403 			c1, s1, c2, s2);
404 	}
405 	return 0;
406 }
407 
408 struct msg_stats {
409 	size_t bytes_sent;
410 	size_t bytes_recvd;
411 	struct timespec start;
412 	struct timespec end;
413 };
414 
415 static int msg_loop_sendpage(int fd, int iov_length, int cnt,
416 			     struct msg_stats *s,
417 			     struct sockmap_options *opt)
418 {
419 	bool drop = opt->drop_expected;
420 	unsigned char k = 0;
421 	FILE *file;
422 	int i, fp;
423 
424 	file = tmpfile();
425 	if (!file) {
426 		perror("create file for sendpage");
427 		return 1;
428 	}
429 	for (i = 0; i < iov_length * cnt; i++, k++)
430 		fwrite(&k, sizeof(char), 1, file);
431 	fflush(file);
432 	fseek(file, 0, SEEK_SET);
433 
434 	fp = fileno(file);
435 
436 	clock_gettime(CLOCK_MONOTONIC, &s->start);
437 	for (i = 0; i < cnt; i++) {
438 		int sent;
439 
440 		errno = 0;
441 		sent = sendfile(fd, fp, NULL, iov_length);
442 
443 		if (!drop && sent < 0) {
444 			perror("sendpage loop error");
445 			fclose(file);
446 			return sent;
447 		} else if (drop && sent >= 0) {
448 			printf("sendpage loop error expected: %i errno %i\n",
449 			       sent, errno);
450 			fclose(file);
451 			return -EIO;
452 		}
453 
454 		if (sent > 0)
455 			s->bytes_sent += sent;
456 	}
457 	clock_gettime(CLOCK_MONOTONIC, &s->end);
458 	fclose(file);
459 	return 0;
460 }
461 
462 static void msg_free_iov(struct msghdr *msg)
463 {
464 	int i;
465 
466 	for (i = 0; i < msg->msg_iovlen; i++)
467 		free(msg->msg_iov[i].iov_base);
468 	free(msg->msg_iov);
469 	msg->msg_iov = NULL;
470 	msg->msg_iovlen = 0;
471 }
472 
473 static int msg_alloc_iov(struct msghdr *msg,
474 			 int iov_count, int iov_length,
475 			 bool data, bool xmit)
476 {
477 	unsigned char k = 0;
478 	struct iovec *iov;
479 	int i;
480 
481 	iov = calloc(iov_count, sizeof(struct iovec));
482 	if (!iov)
483 		return errno;
484 
485 	for (i = 0; i < iov_count; i++) {
486 		unsigned char *d = calloc(iov_length, sizeof(char));
487 
488 		if (!d) {
489 			fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
490 			goto unwind_iov;
491 		}
492 		iov[i].iov_base = d;
493 		iov[i].iov_len = iov_length;
494 
495 		if (data && xmit) {
496 			int j;
497 
498 			for (j = 0; j < iov_length; j++)
499 				d[j] = k++;
500 		}
501 	}
502 
503 	msg->msg_iov = iov;
504 	msg->msg_iovlen = iov_count;
505 
506 	return 0;
507 unwind_iov:
508 	for (i--; i >= 0 ; i--)
509 		free(msg->msg_iov[i].iov_base);
510 	return -ENOMEM;
511 }
512 
513 static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
514 {
515 	int i, j = 0, bytes_cnt = 0;
516 	unsigned char k = 0;
517 
518 	for (i = 0; i < msg->msg_iovlen; i++) {
519 		unsigned char *d = msg->msg_iov[i].iov_base;
520 
521 		/* Special case test for skb ingress + ktls */
522 		if (i == 0 && txmsg_ktls_skb) {
523 			if (msg->msg_iov[i].iov_len < 4)
524 				return -EIO;
525 			if (memcmp(d, "PASS", 4) != 0) {
526 				fprintf(stderr,
527 					"detected skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n",
528 					i, 0, d[0], d[1], d[2], d[3]);
529 				return -EIO;
530 			}
531 			j = 4; /* advance index past PASS header */
532 		}
533 
534 		for (; j < msg->msg_iov[i].iov_len && size; j++) {
535 			if (d[j] != k++) {
536 				fprintf(stderr,
537 					"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
538 					i, j, d[j], k - 1, d[j+1], k);
539 				return -EIO;
540 			}
541 			bytes_cnt++;
542 			if (bytes_cnt == chunk_sz) {
543 				k = 0;
544 				bytes_cnt = 0;
545 			}
546 			size--;
547 		}
548 	}
549 	return 0;
550 }
551 
552 static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
553 		    struct msg_stats *s, bool tx,
554 		    struct sockmap_options *opt)
555 {
556 	struct msghdr msg = {0}, msg_peek = {0};
557 	int err, i, flags = MSG_NOSIGNAL;
558 	bool drop = opt->drop_expected;
559 	bool data = opt->data_test;
560 	int iov_alloc_length = iov_length;
561 
562 	if (!tx && opt->check_recved_len)
563 		iov_alloc_length *= 2;
564 
565 	err = msg_alloc_iov(&msg, iov_count, iov_alloc_length, data, tx);
566 	if (err)
567 		goto out_errno;
568 	if (peek_flag) {
569 		err = msg_alloc_iov(&msg_peek, iov_count, iov_length, data, tx);
570 		if (err)
571 			goto out_errno;
572 	}
573 
574 	if (tx) {
575 		clock_gettime(CLOCK_MONOTONIC, &s->start);
576 		for (i = 0; i < cnt; i++) {
577 			int sent;
578 
579 			errno = 0;
580 			sent = sendmsg(fd, &msg, flags);
581 
582 			if (!drop && sent < 0) {
583 				perror("sendmsg loop error");
584 				goto out_errno;
585 			} else if (drop && sent >= 0) {
586 				fprintf(stderr,
587 					"sendmsg loop error expected: %i errno %i\n",
588 					sent, errno);
589 				errno = -EIO;
590 				goto out_errno;
591 			}
592 			if (sent > 0)
593 				s->bytes_sent += sent;
594 		}
595 		clock_gettime(CLOCK_MONOTONIC, &s->end);
596 	} else {
597 		int slct, recvp = 0, recv, max_fd = fd;
598 		float total_bytes, txmsg_pop_total;
599 		int fd_flags = O_NONBLOCK;
600 		struct timeval timeout;
601 		fd_set w;
602 
603 		fcntl(fd, fd_flags);
604 		/* Account for pop bytes noting each iteration of apply will
605 		 * call msg_pop_data helper so we need to account for this
606 		 * by calculating the number of apply iterations. Note user
607 		 * of the tool can create cases where no data is sent by
608 		 * manipulating pop/push/pull/etc. For example txmsg_apply 1
609 		 * with txmsg_pop 1 will try to apply 1B at a time but each
610 		 * iteration will then pop 1B so no data will ever be sent.
611 		 * This is really only useful for testing edge cases in code
612 		 * paths.
613 		 */
614 		total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
615 		if (txmsg_apply)
616 			txmsg_pop_total = txmsg_pop * (total_bytes / txmsg_apply);
617 		else
618 			txmsg_pop_total = txmsg_pop * cnt;
619 		total_bytes -= txmsg_pop_total;
620 		err = clock_gettime(CLOCK_MONOTONIC, &s->start);
621 		if (err < 0)
622 			perror("recv start time");
623 		while (s->bytes_recvd < total_bytes) {
624 			if (txmsg_cork) {
625 				timeout.tv_sec = 0;
626 				timeout.tv_usec = 300000;
627 			} else {
628 				timeout.tv_sec = 3;
629 				timeout.tv_usec = 0;
630 			}
631 
632 			/* FD sets */
633 			FD_ZERO(&w);
634 			FD_SET(fd, &w);
635 
636 			slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
637 			if (slct == -1) {
638 				perror("select()");
639 				clock_gettime(CLOCK_MONOTONIC, &s->end);
640 				goto out_errno;
641 			} else if (!slct) {
642 				if (opt->verbose)
643 					fprintf(stderr, "unexpected timeout: recved %zu/%f pop_total %f\n", s->bytes_recvd, total_bytes, txmsg_pop_total);
644 				errno = -EIO;
645 				clock_gettime(CLOCK_MONOTONIC, &s->end);
646 				goto out_errno;
647 			}
648 
649 			errno = 0;
650 			if (peek_flag) {
651 				flags |= MSG_PEEK;
652 				recvp = recvmsg(fd, &msg_peek, flags);
653 				if (recvp < 0) {
654 					if (errno != EWOULDBLOCK) {
655 						clock_gettime(CLOCK_MONOTONIC, &s->end);
656 						goto out_errno;
657 					}
658 				}
659 				flags = 0;
660 			}
661 
662 			recv = recvmsg(fd, &msg, flags);
663 			if (recv < 0) {
664 				if (errno != EWOULDBLOCK) {
665 					clock_gettime(CLOCK_MONOTONIC, &s->end);
666 					perror("recv failed()");
667 					goto out_errno;
668 				}
669 			}
670 
671 			s->bytes_recvd += recv;
672 
673 			if (opt->check_recved_len && s->bytes_recvd > total_bytes) {
674 				errno = EMSGSIZE;
675 				fprintf(stderr, "recv failed(), bytes_recvd:%zd, total_bytes:%f\n",
676 						s->bytes_recvd, total_bytes);
677 				goto out_errno;
678 			}
679 
680 			if (data) {
681 				int chunk_sz = opt->sendpage ?
682 						iov_length * cnt :
683 						iov_length * iov_count;
684 
685 				errno = msg_verify_data(&msg, recv, chunk_sz);
686 				if (errno) {
687 					perror("data verify msg failed");
688 					goto out_errno;
689 				}
690 				if (recvp) {
691 					errno = msg_verify_data(&msg_peek,
692 								recvp,
693 								chunk_sz);
694 					if (errno) {
695 						perror("data verify msg_peek failed");
696 						goto out_errno;
697 					}
698 				}
699 			}
700 		}
701 		clock_gettime(CLOCK_MONOTONIC, &s->end);
702 	}
703 
704 	msg_free_iov(&msg);
705 	msg_free_iov(&msg_peek);
706 	return err;
707 out_errno:
708 	msg_free_iov(&msg);
709 	msg_free_iov(&msg_peek);
710 	return errno;
711 }
712 
713 static float giga = 1000000000;
714 
715 static inline float sentBps(struct msg_stats s)
716 {
717 	return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
718 }
719 
720 static inline float recvdBps(struct msg_stats s)
721 {
722 	return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
723 }
724 
725 static int sendmsg_test(struct sockmap_options *opt)
726 {
727 	float sent_Bps = 0, recvd_Bps = 0;
728 	int rx_fd, txpid, rxpid, err = 0;
729 	struct msg_stats s = {0};
730 	int iov_count = opt->iov_count;
731 	int iov_buf = opt->iov_length;
732 	int rx_status, tx_status;
733 	int cnt = opt->rate;
734 
735 	errno = 0;
736 
737 	if (opt->base)
738 		rx_fd = p1;
739 	else
740 		rx_fd = p2;
741 
742 	if (ktls) {
743 		/* Redirecting into non-TLS socket which sends into a TLS
744 		 * socket is not a valid test. So in this case lets not
745 		 * enable kTLS but still run the test.
746 		 */
747 		if (!txmsg_redir || txmsg_ingress) {
748 			err = sockmap_init_ktls(opt->verbose, rx_fd);
749 			if (err)
750 				return err;
751 		}
752 		err = sockmap_init_ktls(opt->verbose, c1);
753 		if (err)
754 			return err;
755 	}
756 
757 	rxpid = fork();
758 	if (rxpid == 0) {
759 		if (txmsg_pop || txmsg_start_pop)
760 			iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
761 		if (opt->drop_expected || txmsg_ktls_skb_drop)
762 			_exit(0);
763 
764 		if (!iov_buf) /* zero bytes sent case */
765 			_exit(0);
766 
767 		if (opt->sendpage)
768 			iov_count = 1;
769 		err = msg_loop(rx_fd, iov_count, iov_buf,
770 			       cnt, &s, false, opt);
771 		if (opt->verbose > 1)
772 			fprintf(stderr,
773 				"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
774 				iov_count, iov_buf, cnt, err);
775 		if (s.end.tv_sec - s.start.tv_sec) {
776 			sent_Bps = sentBps(s);
777 			recvd_Bps = recvdBps(s);
778 		}
779 		if (opt->verbose > 1)
780 			fprintf(stdout,
781 				"rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n",
782 				s.bytes_sent, sent_Bps, sent_Bps/giga,
783 				s.bytes_recvd, recvd_Bps, recvd_Bps/giga,
784 				peek_flag ? "(peek_msg)" : "");
785 		if (err && txmsg_cork)
786 			err = 0;
787 		exit(err ? 1 : 0);
788 	} else if (rxpid == -1) {
789 		perror("msg_loop_rx");
790 		return errno;
791 	}
792 
793 	txpid = fork();
794 	if (txpid == 0) {
795 		if (opt->sendpage)
796 			err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
797 		else
798 			err = msg_loop(c1, iov_count, iov_buf,
799 				       cnt, &s, true, opt);
800 
801 		if (err)
802 			fprintf(stderr,
803 				"msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
804 				iov_count, iov_buf, cnt, err);
805 		if (s.end.tv_sec - s.start.tv_sec) {
806 			sent_Bps = sentBps(s);
807 			recvd_Bps = recvdBps(s);
808 		}
809 		if (opt->verbose > 1)
810 			fprintf(stdout,
811 				"tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
812 				s.bytes_sent, sent_Bps, sent_Bps/giga,
813 				s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
814 		exit(err ? 1 : 0);
815 	} else if (txpid == -1) {
816 		perror("msg_loop_tx");
817 		return errno;
818 	}
819 
820 	assert(waitpid(rxpid, &rx_status, 0) == rxpid);
821 	assert(waitpid(txpid, &tx_status, 0) == txpid);
822 	if (WIFEXITED(rx_status)) {
823 		err = WEXITSTATUS(rx_status);
824 		if (err) {
825 			fprintf(stderr, "rx thread exited with err %d.\n", err);
826 			goto out;
827 		}
828 	}
829 	if (WIFEXITED(tx_status)) {
830 		err = WEXITSTATUS(tx_status);
831 		if (err)
832 			fprintf(stderr, "tx thread exited with err %d.\n", err);
833 	}
834 out:
835 	return err;
836 }
837 
838 static int forever_ping_pong(int rate, struct sockmap_options *opt)
839 {
840 	struct timeval timeout;
841 	char buf[1024] = {0};
842 	int sc;
843 
844 	timeout.tv_sec = 10;
845 	timeout.tv_usec = 0;
846 
847 	/* Ping/Pong data from client to server */
848 	sc = send(c1, buf, sizeof(buf), 0);
849 	if (sc < 0) {
850 		perror("send failed()");
851 		return sc;
852 	}
853 
854 	do {
855 		int s, rc, i, max_fd = p2;
856 		fd_set w;
857 
858 		/* FD sets */
859 		FD_ZERO(&w);
860 		FD_SET(c1, &w);
861 		FD_SET(c2, &w);
862 		FD_SET(p1, &w);
863 		FD_SET(p2, &w);
864 
865 		s = select(max_fd + 1, &w, NULL, NULL, &timeout);
866 		if (s == -1) {
867 			perror("select()");
868 			break;
869 		} else if (!s) {
870 			fprintf(stderr, "unexpected timeout\n");
871 			break;
872 		}
873 
874 		for (i = 0; i <= max_fd && s > 0; ++i) {
875 			if (!FD_ISSET(i, &w))
876 				continue;
877 
878 			s--;
879 
880 			rc = recv(i, buf, sizeof(buf), 0);
881 			if (rc < 0) {
882 				if (errno != EWOULDBLOCK) {
883 					perror("recv failed()");
884 					return rc;
885 				}
886 			}
887 
888 			if (rc == 0) {
889 				close(i);
890 				break;
891 			}
892 
893 			sc = send(i, buf, rc, 0);
894 			if (sc < 0) {
895 				perror("send failed()");
896 				return sc;
897 			}
898 		}
899 
900 		if (rate)
901 			sleep(rate);
902 
903 		if (opt->verbose) {
904 			printf(".");
905 			fflush(stdout);
906 
907 		}
908 	} while (running);
909 
910 	return 0;
911 }
912 
913 enum {
914 	SELFTESTS,
915 	PING_PONG,
916 	SENDMSG,
917 	BASE,
918 	BASE_SENDPAGE,
919 	SENDPAGE,
920 };
921 
922 static int run_options(struct sockmap_options *options, int cg_fd,  int test)
923 {
924 	int i, key, next_key, err, tx_prog_fd = -1, zero = 0;
925 
926 	/* If base test skip BPF setup */
927 	if (test == BASE || test == BASE_SENDPAGE)
928 		goto run;
929 
930 	/* Attach programs to sockmap */
931 	if (!txmsg_omit_skb_parser) {
932 		err = bpf_prog_attach(prog_fd[0], map_fd[0],
933 				      BPF_SK_SKB_STREAM_PARSER, 0);
934 		if (err) {
935 			fprintf(stderr,
936 				"ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
937 				prog_fd[0], map_fd[0], err, strerror(errno));
938 			return err;
939 		}
940 	}
941 
942 	err = bpf_prog_attach(prog_fd[1], map_fd[0],
943 				BPF_SK_SKB_STREAM_VERDICT, 0);
944 	if (err) {
945 		fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
946 			err, strerror(errno));
947 		return err;
948 	}
949 
950 	/* Attach programs to TLS sockmap */
951 	if (txmsg_ktls_skb) {
952 		if (!txmsg_omit_skb_parser) {
953 			err = bpf_prog_attach(prog_fd[0], map_fd[8],
954 					      BPF_SK_SKB_STREAM_PARSER, 0);
955 			if (err) {
956 				fprintf(stderr,
957 					"ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
958 					prog_fd[0], map_fd[8], err, strerror(errno));
959 				return err;
960 			}
961 		}
962 
963 		err = bpf_prog_attach(prog_fd[2], map_fd[8],
964 				      BPF_SK_SKB_STREAM_VERDICT, 0);
965 		if (err) {
966 			fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n",
967 				err, strerror(errno));
968 			return err;
969 		}
970 	}
971 
972 	/* Attach to cgroups */
973 	err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
974 	if (err) {
975 		fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
976 			err, strerror(errno));
977 		return err;
978 	}
979 
980 run:
981 	err = sockmap_init_sockets(options->verbose);
982 	if (err) {
983 		fprintf(stderr, "ERROR: test socket failed: %d\n", err);
984 		goto out;
985 	}
986 
987 	/* Attach txmsg program to sockmap */
988 	if (txmsg_pass)
989 		tx_prog_fd = prog_fd[4];
990 	else if (txmsg_redir)
991 		tx_prog_fd = prog_fd[5];
992 	else if (txmsg_apply)
993 		tx_prog_fd = prog_fd[6];
994 	else if (txmsg_cork)
995 		tx_prog_fd = prog_fd[7];
996 	else if (txmsg_drop)
997 		tx_prog_fd = prog_fd[8];
998 	else
999 		tx_prog_fd = 0;
1000 
1001 	if (tx_prog_fd) {
1002 		int redir_fd, i = 0;
1003 
1004 		err = bpf_prog_attach(tx_prog_fd,
1005 				      map_fd[1], BPF_SK_MSG_VERDICT, 0);
1006 		if (err) {
1007 			fprintf(stderr,
1008 				"ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
1009 				err, strerror(errno));
1010 			goto out;
1011 		}
1012 
1013 		err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
1014 		if (err) {
1015 			fprintf(stderr,
1016 				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
1017 				err, strerror(errno));
1018 			goto out;
1019 		}
1020 
1021 		if (txmsg_redir)
1022 			redir_fd = c2;
1023 		else
1024 			redir_fd = c1;
1025 
1026 		err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
1027 		if (err) {
1028 			fprintf(stderr,
1029 				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
1030 				err, strerror(errno));
1031 			goto out;
1032 		}
1033 
1034 		if (txmsg_apply) {
1035 			err = bpf_map_update_elem(map_fd[3],
1036 						  &i, &txmsg_apply, BPF_ANY);
1037 			if (err) {
1038 				fprintf(stderr,
1039 					"ERROR: bpf_map_update_elem (apply_bytes):  %d (%s\n",
1040 					err, strerror(errno));
1041 				goto out;
1042 			}
1043 		}
1044 
1045 		if (txmsg_cork) {
1046 			err = bpf_map_update_elem(map_fd[4],
1047 						  &i, &txmsg_cork, BPF_ANY);
1048 			if (err) {
1049 				fprintf(stderr,
1050 					"ERROR: bpf_map_update_elem (cork_bytes):  %d (%s\n",
1051 					err, strerror(errno));
1052 				goto out;
1053 			}
1054 		}
1055 
1056 		if (txmsg_start) {
1057 			err = bpf_map_update_elem(map_fd[5],
1058 						  &i, &txmsg_start, BPF_ANY);
1059 			if (err) {
1060 				fprintf(stderr,
1061 					"ERROR: bpf_map_update_elem (txmsg_start):  %d (%s)\n",
1062 					err, strerror(errno));
1063 				goto out;
1064 			}
1065 		}
1066 
1067 		if (txmsg_end) {
1068 			i = 1;
1069 			err = bpf_map_update_elem(map_fd[5],
1070 						  &i, &txmsg_end, BPF_ANY);
1071 			if (err) {
1072 				fprintf(stderr,
1073 					"ERROR: bpf_map_update_elem (txmsg_end):  %d (%s)\n",
1074 					err, strerror(errno));
1075 				goto out;
1076 			}
1077 		}
1078 
1079 		if (txmsg_start_push) {
1080 			i = 2;
1081 			err = bpf_map_update_elem(map_fd[5],
1082 						  &i, &txmsg_start_push, BPF_ANY);
1083 			if (err) {
1084 				fprintf(stderr,
1085 					"ERROR: bpf_map_update_elem (txmsg_start_push):  %d (%s)\n",
1086 					err, strerror(errno));
1087 				goto out;
1088 			}
1089 		}
1090 
1091 		if (txmsg_end_push) {
1092 			i = 3;
1093 			err = bpf_map_update_elem(map_fd[5],
1094 						  &i, &txmsg_end_push, BPF_ANY);
1095 			if (err) {
1096 				fprintf(stderr,
1097 					"ERROR: bpf_map_update_elem %i@%i (txmsg_end_push):  %d (%s)\n",
1098 					txmsg_end_push, i, err, strerror(errno));
1099 				goto out;
1100 			}
1101 		}
1102 
1103 		if (txmsg_start_pop) {
1104 			i = 4;
1105 			err = bpf_map_update_elem(map_fd[5],
1106 						  &i, &txmsg_start_pop, BPF_ANY);
1107 			if (err) {
1108 				fprintf(stderr,
1109 					"ERROR: bpf_map_update_elem %i@%i (txmsg_start_pop):  %d (%s)\n",
1110 					txmsg_start_pop, i, err, strerror(errno));
1111 				goto out;
1112 			}
1113 		} else {
1114 			i = 4;
1115 			bpf_map_update_elem(map_fd[5],
1116 						  &i, &txmsg_start_pop, BPF_ANY);
1117 		}
1118 
1119 		if (txmsg_pop) {
1120 			i = 5;
1121 			err = bpf_map_update_elem(map_fd[5],
1122 						  &i, &txmsg_pop, BPF_ANY);
1123 			if (err) {
1124 				fprintf(stderr,
1125 					"ERROR: bpf_map_update_elem %i@%i (txmsg_pop):  %d (%s)\n",
1126 					txmsg_pop, i, err, strerror(errno));
1127 				goto out;
1128 			}
1129 		} else {
1130 			i = 5;
1131 			bpf_map_update_elem(map_fd[5],
1132 					    &i, &txmsg_pop, BPF_ANY);
1133 
1134 		}
1135 
1136 		if (txmsg_ingress) {
1137 			int in = BPF_F_INGRESS;
1138 
1139 			i = 0;
1140 			err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
1141 			if (err) {
1142 				fprintf(stderr,
1143 					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
1144 					err, strerror(errno));
1145 			}
1146 			i = 1;
1147 			err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
1148 			if (err) {
1149 				fprintf(stderr,
1150 					"ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
1151 					err, strerror(errno));
1152 			}
1153 			err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
1154 			if (err) {
1155 				fprintf(stderr,
1156 					"ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
1157 					err, strerror(errno));
1158 			}
1159 
1160 			i = 2;
1161 			err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
1162 			if (err) {
1163 				fprintf(stderr,
1164 					"ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
1165 					err, strerror(errno));
1166 			}
1167 		}
1168 
1169 		if (txmsg_ktls_skb) {
1170 			int ingress = BPF_F_INGRESS;
1171 
1172 			i = 0;
1173 			err = bpf_map_update_elem(map_fd[8], &i, &p2, BPF_ANY);
1174 			if (err) {
1175 				fprintf(stderr,
1176 					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
1177 					err, strerror(errno));
1178 			}
1179 
1180 			if (txmsg_ktls_skb_redir) {
1181 				i = 1;
1182 				err = bpf_map_update_elem(map_fd[7],
1183 							  &i, &ingress, BPF_ANY);
1184 				if (err) {
1185 					fprintf(stderr,
1186 						"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
1187 						err, strerror(errno));
1188 				}
1189 			}
1190 
1191 			if (txmsg_ktls_skb_drop) {
1192 				i = 1;
1193 				err = bpf_map_update_elem(map_fd[7], &i, &i, BPF_ANY);
1194 			}
1195 		}
1196 
1197 		if (txmsg_redir_skb) {
1198 			int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
1199 					p2 : p1;
1200 			int ingress = BPF_F_INGRESS;
1201 
1202 			i = 0;
1203 			err = bpf_map_update_elem(map_fd[7],
1204 						  &i, &ingress, BPF_ANY);
1205 			if (err) {
1206 				fprintf(stderr,
1207 					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
1208 					err, strerror(errno));
1209 			}
1210 
1211 			i = 3;
1212 			err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
1213 			if (err) {
1214 				fprintf(stderr,
1215 					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
1216 					err, strerror(errno));
1217 			}
1218 		}
1219 	}
1220 
1221 	if (skb_use_parser) {
1222 		i = 2;
1223 		err = bpf_map_update_elem(map_fd[7], &i, &skb_use_parser, BPF_ANY);
1224 	}
1225 
1226 	if (txmsg_drop)
1227 		options->drop_expected = true;
1228 
1229 	if (test == PING_PONG)
1230 		err = forever_ping_pong(options->rate, options);
1231 	else if (test == SENDMSG) {
1232 		options->base = false;
1233 		options->sendpage = false;
1234 		err = sendmsg_test(options);
1235 	} else if (test == SENDPAGE) {
1236 		options->base = false;
1237 		options->sendpage = true;
1238 		err = sendmsg_test(options);
1239 	} else if (test == BASE) {
1240 		options->base = true;
1241 		options->sendpage = false;
1242 		err = sendmsg_test(options);
1243 	} else if (test == BASE_SENDPAGE) {
1244 		options->base = true;
1245 		options->sendpage = true;
1246 		err = sendmsg_test(options);
1247 	} else
1248 		fprintf(stderr, "unknown test\n");
1249 out:
1250 	/* Detatch and zero all the maps */
1251 	bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS);
1252 	bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
1253 	bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
1254 	bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER);
1255 	bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT);
1256 
1257 	if (tx_prog_fd >= 0)
1258 		bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
1259 
1260 	for (i = 0; i < 8; i++) {
1261 		key = next_key = 0;
1262 		bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
1263 		while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) {
1264 			bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
1265 			key = next_key;
1266 		}
1267 	}
1268 
1269 	close(s1);
1270 	close(s2);
1271 	close(p1);
1272 	close(p2);
1273 	close(c1);
1274 	close(c2);
1275 	return err;
1276 }
1277 
1278 static char *test_to_str(int test)
1279 {
1280 	switch (test) {
1281 	case SENDMSG:
1282 		return "sendmsg";
1283 	case SENDPAGE:
1284 		return "sendpage";
1285 	}
1286 	return "unknown";
1287 }
1288 
1289 static void append_str(char *dst, const char *src, size_t dst_cap)
1290 {
1291 	size_t avail = dst_cap - strlen(dst);
1292 
1293 	if (avail <= 1) /* just zero byte could be written */
1294 		return;
1295 
1296 	strncat(dst, src, avail - 1); /* strncat() adds + 1 for zero byte */
1297 }
1298 
1299 #define OPTSTRING 60
1300 static void test_options(char *options)
1301 {
1302 	char tstr[OPTSTRING];
1303 
1304 	memset(options, 0, OPTSTRING);
1305 
1306 	if (txmsg_pass)
1307 		append_str(options, "pass,", OPTSTRING);
1308 	if (txmsg_redir)
1309 		append_str(options, "redir,", OPTSTRING);
1310 	if (txmsg_drop)
1311 		append_str(options, "drop,", OPTSTRING);
1312 	if (txmsg_apply) {
1313 		snprintf(tstr, OPTSTRING, "apply %d,", txmsg_apply);
1314 		append_str(options, tstr, OPTSTRING);
1315 	}
1316 	if (txmsg_cork) {
1317 		snprintf(tstr, OPTSTRING, "cork %d,", txmsg_cork);
1318 		append_str(options, tstr, OPTSTRING);
1319 	}
1320 	if (txmsg_start) {
1321 		snprintf(tstr, OPTSTRING, "start %d,", txmsg_start);
1322 		append_str(options, tstr, OPTSTRING);
1323 	}
1324 	if (txmsg_end) {
1325 		snprintf(tstr, OPTSTRING, "end %d,", txmsg_end);
1326 		append_str(options, tstr, OPTSTRING);
1327 	}
1328 	if (txmsg_start_pop) {
1329 		snprintf(tstr, OPTSTRING, "pop (%d,%d),",
1330 			 txmsg_start_pop, txmsg_start_pop + txmsg_pop);
1331 		append_str(options, tstr, OPTSTRING);
1332 	}
1333 	if (txmsg_ingress)
1334 		append_str(options, "ingress,", OPTSTRING);
1335 	if (txmsg_redir_skb)
1336 		append_str(options, "redir_skb,", OPTSTRING);
1337 	if (txmsg_ktls_skb)
1338 		append_str(options, "ktls_skb,", OPTSTRING);
1339 	if (ktls)
1340 		append_str(options, "ktls,", OPTSTRING);
1341 	if (peek_flag)
1342 		append_str(options, "peek,", OPTSTRING);
1343 }
1344 
1345 static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
1346 {
1347 	char *options = calloc(OPTSTRING, sizeof(char));
1348 	int err;
1349 
1350 	if (test == SENDPAGE)
1351 		opt->sendpage = true;
1352 	else
1353 		opt->sendpage = false;
1354 
1355 	if (txmsg_drop)
1356 		opt->drop_expected = true;
1357 	else
1358 		opt->drop_expected = false;
1359 
1360 	test_options(options);
1361 
1362 	if (opt->verbose) {
1363 		fprintf(stdout,
1364 			" [TEST %i]: (%i, %i, %i, %s, %s): ",
1365 			test_cnt, opt->rate, opt->iov_count, opt->iov_length,
1366 			test_to_str(test), options);
1367 		fflush(stdout);
1368 	}
1369 	err = run_options(opt, cgrp, test);
1370 	if (opt->verbose)
1371 		fprintf(stdout, " %s\n", !err ? "PASS" : "FAILED");
1372 	test_cnt++;
1373 	!err ? passed++ : failed++;
1374 	free(options);
1375 	return err;
1376 }
1377 
1378 static void test_exec(int cgrp, struct sockmap_options *opt)
1379 {
1380 	int type = strcmp(opt->map, BPF_SOCKMAP_FILENAME);
1381 	int err;
1382 
1383 	if (type == 0) {
1384 		test_start();
1385 		err = __test_exec(cgrp, SENDMSG, opt);
1386 		if (err)
1387 			test_fail();
1388 	} else {
1389 		test_start();
1390 		err = __test_exec(cgrp, SENDPAGE, opt);
1391 		if (err)
1392 			test_fail();
1393 	}
1394 }
1395 
1396 static void test_send_one(struct sockmap_options *opt, int cgrp)
1397 {
1398 	opt->iov_length = 1;
1399 	opt->iov_count = 1;
1400 	opt->rate = 1;
1401 	test_exec(cgrp, opt);
1402 
1403 	opt->iov_length = 1;
1404 	opt->iov_count = 1024;
1405 	opt->rate = 1;
1406 	test_exec(cgrp, opt);
1407 
1408 	opt->iov_length = 1024;
1409 	opt->iov_count = 1;
1410 	opt->rate = 1;
1411 	test_exec(cgrp, opt);
1412 
1413 }
1414 
1415 static void test_send_many(struct sockmap_options *opt, int cgrp)
1416 {
1417 	opt->iov_length = 3;
1418 	opt->iov_count = 1;
1419 	opt->rate = 512;
1420 	test_exec(cgrp, opt);
1421 
1422 	opt->rate = 100;
1423 	opt->iov_count = 1;
1424 	opt->iov_length = 5;
1425 	test_exec(cgrp, opt);
1426 }
1427 
1428 static void test_send_large(struct sockmap_options *opt, int cgrp)
1429 {
1430 	opt->iov_length = 256;
1431 	opt->iov_count = 1024;
1432 	opt->rate = 2;
1433 	test_exec(cgrp, opt);
1434 }
1435 
1436 static void test_send(struct sockmap_options *opt, int cgrp)
1437 {
1438 	test_send_one(opt, cgrp);
1439 	test_send_many(opt, cgrp);
1440 	test_send_large(opt, cgrp);
1441 	sched_yield();
1442 }
1443 
1444 static void test_txmsg_pass(int cgrp, struct sockmap_options *opt)
1445 {
1446 	/* Test small and large iov_count values with pass/redir/apply/cork */
1447 	txmsg_pass = 1;
1448 	test_send(opt, cgrp);
1449 }
1450 
1451 static void test_txmsg_redir(int cgrp, struct sockmap_options *opt)
1452 {
1453 	txmsg_redir = 1;
1454 	test_send(opt, cgrp);
1455 }
1456 
1457 static void test_txmsg_drop(int cgrp, struct sockmap_options *opt)
1458 {
1459 	txmsg_drop = 1;
1460 	test_send(opt, cgrp);
1461 }
1462 
1463 static void test_txmsg_ingress_redir(int cgrp, struct sockmap_options *opt)
1464 {
1465 	txmsg_pass = txmsg_drop = 0;
1466 	txmsg_ingress = txmsg_redir = 1;
1467 	test_send(opt, cgrp);
1468 }
1469 
1470 static void test_txmsg_skb(int cgrp, struct sockmap_options *opt)
1471 {
1472 	bool data = opt->data_test;
1473 	int k = ktls;
1474 
1475 	opt->data_test = true;
1476 	ktls = 1;
1477 
1478 	txmsg_pass = txmsg_drop = 0;
1479 	txmsg_ingress = txmsg_redir = 0;
1480 	txmsg_ktls_skb = 1;
1481 	txmsg_pass = 1;
1482 
1483 	/* Using data verification so ensure iov layout is
1484 	 * expected from test receiver side. e.g. has enough
1485 	 * bytes to write test code.
1486 	 */
1487 	opt->iov_length = 100;
1488 	opt->iov_count = 1;
1489 	opt->rate = 1;
1490 	test_exec(cgrp, opt);
1491 
1492 	txmsg_ktls_skb_drop = 1;
1493 	test_exec(cgrp, opt);
1494 
1495 	txmsg_ktls_skb_drop = 0;
1496 	txmsg_ktls_skb_redir = 1;
1497 	test_exec(cgrp, opt);
1498 	txmsg_ktls_skb_redir = 0;
1499 
1500 	/* Tests that omit skb_parser */
1501 	txmsg_omit_skb_parser = 1;
1502 	ktls = 0;
1503 	txmsg_ktls_skb = 0;
1504 	test_exec(cgrp, opt);
1505 
1506 	txmsg_ktls_skb_drop = 1;
1507 	test_exec(cgrp, opt);
1508 	txmsg_ktls_skb_drop = 0;
1509 
1510 	txmsg_ktls_skb_redir = 1;
1511 	test_exec(cgrp, opt);
1512 
1513 	ktls = 1;
1514 	test_exec(cgrp, opt);
1515 	txmsg_omit_skb_parser = 0;
1516 
1517 	opt->data_test = data;
1518 	ktls = k;
1519 }
1520 
1521 /* Test cork with hung data. This tests poor usage patterns where
1522  * cork can leave data on the ring if user program is buggy and
1523  * doesn't flush them somehow. They do take some time however
1524  * because they wait for a timeout. Test pass, redir and cork with
1525  * apply logic. Use cork size of 4097 with send_large to avoid
1526  * aligning cork size with send size.
1527  */
1528 static void test_txmsg_cork_hangs(int cgrp, struct sockmap_options *opt)
1529 {
1530 	txmsg_pass = 1;
1531 	txmsg_redir = 0;
1532 	txmsg_cork = 4097;
1533 	txmsg_apply = 4097;
1534 	test_send_large(opt, cgrp);
1535 
1536 	txmsg_pass = 0;
1537 	txmsg_redir = 1;
1538 	txmsg_apply = 0;
1539 	txmsg_cork = 4097;
1540 	test_send_large(opt, cgrp);
1541 
1542 	txmsg_pass = 0;
1543 	txmsg_redir = 1;
1544 	txmsg_apply = 4097;
1545 	txmsg_cork = 4097;
1546 	test_send_large(opt, cgrp);
1547 }
1548 
1549 static void test_txmsg_pull(int cgrp, struct sockmap_options *opt)
1550 {
1551 	/* Test basic start/end */
1552 	txmsg_start = 1;
1553 	txmsg_end = 2;
1554 	test_send(opt, cgrp);
1555 
1556 	/* Test >4k pull */
1557 	txmsg_start = 4096;
1558 	txmsg_end = 9182;
1559 	test_send_large(opt, cgrp);
1560 
1561 	/* Test pull + redirect */
1562 	txmsg_redir = 0;
1563 	txmsg_start = 1;
1564 	txmsg_end = 2;
1565 	test_send(opt, cgrp);
1566 
1567 	/* Test pull + cork */
1568 	txmsg_redir = 0;
1569 	txmsg_cork = 512;
1570 	txmsg_start = 1;
1571 	txmsg_end = 2;
1572 	test_send_many(opt, cgrp);
1573 
1574 	/* Test pull + cork + redirect */
1575 	txmsg_redir = 1;
1576 	txmsg_cork = 512;
1577 	txmsg_start = 1;
1578 	txmsg_end = 2;
1579 	test_send_many(opt, cgrp);
1580 }
1581 
1582 static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
1583 {
1584 	/* Test basic pop */
1585 	txmsg_start_pop = 1;
1586 	txmsg_pop = 2;
1587 	test_send_many(opt, cgrp);
1588 
1589 	/* Test pop with >4k */
1590 	txmsg_start_pop = 4096;
1591 	txmsg_pop = 4096;
1592 	test_send_large(opt, cgrp);
1593 
1594 	/* Test pop + redirect */
1595 	txmsg_redir = 1;
1596 	txmsg_start_pop = 1;
1597 	txmsg_pop = 2;
1598 	test_send_many(opt, cgrp);
1599 
1600 	/* Test pop + cork */
1601 	txmsg_redir = 0;
1602 	txmsg_cork = 512;
1603 	txmsg_start_pop = 1;
1604 	txmsg_pop = 2;
1605 	test_send_many(opt, cgrp);
1606 
1607 	/* Test pop + redirect + cork */
1608 	txmsg_redir = 1;
1609 	txmsg_cork = 4;
1610 	txmsg_start_pop = 1;
1611 	txmsg_pop = 2;
1612 	test_send_many(opt, cgrp);
1613 }
1614 
1615 static void test_txmsg_push(int cgrp, struct sockmap_options *opt)
1616 {
1617 	/* Test basic push */
1618 	txmsg_start_push = 1;
1619 	txmsg_end_push = 1;
1620 	test_send(opt, cgrp);
1621 
1622 	/* Test push 4kB >4k */
1623 	txmsg_start_push = 4096;
1624 	txmsg_end_push = 4096;
1625 	test_send_large(opt, cgrp);
1626 
1627 	/* Test push + redirect */
1628 	txmsg_redir = 1;
1629 	txmsg_start_push = 1;
1630 	txmsg_end_push = 2;
1631 	test_send_many(opt, cgrp);
1632 
1633 	/* Test push + cork */
1634 	txmsg_redir = 0;
1635 	txmsg_cork = 512;
1636 	txmsg_start_push = 1;
1637 	txmsg_end_push = 2;
1638 	test_send_many(opt, cgrp);
1639 }
1640 
1641 static void test_txmsg_push_pop(int cgrp, struct sockmap_options *opt)
1642 {
1643 	txmsg_start_push = 1;
1644 	txmsg_end_push = 10;
1645 	txmsg_start_pop = 5;
1646 	txmsg_pop = 4;
1647 	test_send_large(opt, cgrp);
1648 }
1649 
1650 static void test_txmsg_apply(int cgrp, struct sockmap_options *opt)
1651 {
1652 	txmsg_pass = 1;
1653 	txmsg_redir = 0;
1654 	txmsg_apply = 1;
1655 	txmsg_cork = 0;
1656 	test_send_one(opt, cgrp);
1657 
1658 	txmsg_pass = 0;
1659 	txmsg_redir = 1;
1660 	txmsg_apply = 1;
1661 	txmsg_cork = 0;
1662 	test_send_one(opt, cgrp);
1663 
1664 	txmsg_pass = 1;
1665 	txmsg_redir = 0;
1666 	txmsg_apply = 1024;
1667 	txmsg_cork = 0;
1668 	test_send_large(opt, cgrp);
1669 
1670 	txmsg_pass = 0;
1671 	txmsg_redir = 1;
1672 	txmsg_apply = 1024;
1673 	txmsg_cork = 0;
1674 	test_send_large(opt, cgrp);
1675 }
1676 
1677 static void test_txmsg_cork(int cgrp, struct sockmap_options *opt)
1678 {
1679 	txmsg_pass = 1;
1680 	txmsg_redir = 0;
1681 	txmsg_apply = 0;
1682 	txmsg_cork = 1;
1683 	test_send(opt, cgrp);
1684 
1685 	txmsg_pass = 1;
1686 	txmsg_redir = 0;
1687 	txmsg_apply = 1;
1688 	txmsg_cork = 1;
1689 	test_send(opt, cgrp);
1690 }
1691 
1692 static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt)
1693 {
1694 	txmsg_pass = 1;
1695 	skb_use_parser = 512;
1696 	if (ktls == 1)
1697 		skb_use_parser = 570;
1698 	opt->iov_length = 256;
1699 	opt->iov_count = 1;
1700 	opt->rate = 2;
1701 	test_exec(cgrp, opt);
1702 }
1703 
1704 static void test_txmsg_ingress_parser2(int cgrp, struct sockmap_options *opt)
1705 {
1706 	if (ktls == 1)
1707 		return;
1708 	skb_use_parser = 10;
1709 	opt->iov_length = 20;
1710 	opt->iov_count = 1;
1711 	opt->rate = 1;
1712 	opt->check_recved_len = true;
1713 	test_exec(cgrp, opt);
1714 	opt->check_recved_len = false;
1715 }
1716 
1717 char *map_names[] = {
1718 	"sock_map",
1719 	"sock_map_txmsg",
1720 	"sock_map_redir",
1721 	"sock_apply_bytes",
1722 	"sock_cork_bytes",
1723 	"sock_bytes",
1724 	"sock_redir_flags",
1725 	"sock_skb_opts",
1726 	"tls_sock_map",
1727 };
1728 
1729 int prog_attach_type[] = {
1730 	BPF_SK_SKB_STREAM_PARSER,
1731 	BPF_SK_SKB_STREAM_VERDICT,
1732 	BPF_SK_SKB_STREAM_VERDICT,
1733 	BPF_CGROUP_SOCK_OPS,
1734 	BPF_SK_MSG_VERDICT,
1735 	BPF_SK_MSG_VERDICT,
1736 	BPF_SK_MSG_VERDICT,
1737 	BPF_SK_MSG_VERDICT,
1738 	BPF_SK_MSG_VERDICT,
1739 	BPF_SK_MSG_VERDICT,
1740 	BPF_SK_MSG_VERDICT,
1741 };
1742 
1743 int prog_type[] = {
1744 	BPF_PROG_TYPE_SK_SKB,
1745 	BPF_PROG_TYPE_SK_SKB,
1746 	BPF_PROG_TYPE_SK_SKB,
1747 	BPF_PROG_TYPE_SOCK_OPS,
1748 	BPF_PROG_TYPE_SK_MSG,
1749 	BPF_PROG_TYPE_SK_MSG,
1750 	BPF_PROG_TYPE_SK_MSG,
1751 	BPF_PROG_TYPE_SK_MSG,
1752 	BPF_PROG_TYPE_SK_MSG,
1753 	BPF_PROG_TYPE_SK_MSG,
1754 	BPF_PROG_TYPE_SK_MSG,
1755 };
1756 
1757 static int populate_progs(char *bpf_file)
1758 {
1759 	struct bpf_program *prog;
1760 	struct bpf_object *obj;
1761 	int i = 0;
1762 	long err;
1763 
1764 	obj = bpf_object__open(bpf_file);
1765 	err = libbpf_get_error(obj);
1766 	if (err) {
1767 		char err_buf[256];
1768 
1769 		libbpf_strerror(err, err_buf, sizeof(err_buf));
1770 		printf("Unable to load eBPF objects in file '%s' : %s\n",
1771 		       bpf_file, err_buf);
1772 		return -1;
1773 	}
1774 
1775 	bpf_object__for_each_program(prog, obj) {
1776 		bpf_program__set_type(prog, prog_type[i]);
1777 		bpf_program__set_expected_attach_type(prog,
1778 						      prog_attach_type[i]);
1779 		i++;
1780 	}
1781 
1782 	i = bpf_object__load(obj);
1783 	i = 0;
1784 	bpf_object__for_each_program(prog, obj) {
1785 		prog_fd[i] = bpf_program__fd(prog);
1786 		i++;
1787 	}
1788 
1789 	for (i = 0; i < sizeof(map_fd)/sizeof(int); i++) {
1790 		maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
1791 		map_fd[i] = bpf_map__fd(maps[i]);
1792 		if (map_fd[i] < 0) {
1793 			fprintf(stderr, "load_bpf_file: (%i) %s\n",
1794 				map_fd[i], strerror(errno));
1795 			return -1;
1796 		}
1797 	}
1798 
1799 	return 0;
1800 }
1801 
1802 struct _test test[] = {
1803 	{"txmsg test passthrough", test_txmsg_pass},
1804 	{"txmsg test redirect", test_txmsg_redir},
1805 	{"txmsg test drop", test_txmsg_drop},
1806 	{"txmsg test ingress redirect", test_txmsg_ingress_redir},
1807 	{"txmsg test skb", test_txmsg_skb},
1808 	{"txmsg test apply", test_txmsg_apply},
1809 	{"txmsg test cork", test_txmsg_cork},
1810 	{"txmsg test hanging corks", test_txmsg_cork_hangs},
1811 	{"txmsg test push_data", test_txmsg_push},
1812 	{"txmsg test pull-data", test_txmsg_pull},
1813 	{"txmsg test pop-data", test_txmsg_pop},
1814 	{"txmsg test push/pop data", test_txmsg_push_pop},
1815 	{"txmsg test ingress parser", test_txmsg_ingress_parser},
1816 	{"txmsg test ingress parser2", test_txmsg_ingress_parser2},
1817 };
1818 
1819 static int check_whitelist(struct _test *t, struct sockmap_options *opt)
1820 {
1821 	char *entry, *ptr;
1822 
1823 	if (!opt->whitelist)
1824 		return 0;
1825 	ptr = strdup(opt->whitelist);
1826 	if (!ptr)
1827 		return -ENOMEM;
1828 	entry = strtok(ptr, ",");
1829 	while (entry) {
1830 		if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
1831 		    strstr(opt->map, entry) != 0 ||
1832 		    strstr(t->title, entry) != 0)
1833 			return 0;
1834 		entry = strtok(NULL, ",");
1835 	}
1836 	return -EINVAL;
1837 }
1838 
1839 static int check_blacklist(struct _test *t, struct sockmap_options *opt)
1840 {
1841 	char *entry, *ptr;
1842 
1843 	if (!opt->blacklist)
1844 		return -EINVAL;
1845 	ptr = strdup(opt->blacklist);
1846 	if (!ptr)
1847 		return -ENOMEM;
1848 	entry = strtok(ptr, ",");
1849 	while (entry) {
1850 		if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
1851 		    strstr(opt->map, entry) != 0 ||
1852 		    strstr(t->title, entry) != 0)
1853 			return 0;
1854 		entry = strtok(NULL, ",");
1855 	}
1856 	return -EINVAL;
1857 }
1858 
1859 static int __test_selftests(int cg_fd, struct sockmap_options *opt)
1860 {
1861 	int i, err;
1862 
1863 	err = populate_progs(opt->map);
1864 	if (err < 0) {
1865 		fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
1866 		return err;
1867 	}
1868 
1869 	/* Tests basic commands and APIs */
1870 	for (i = 0; i < sizeof(test)/sizeof(struct _test); i++) {
1871 		struct _test t = test[i];
1872 
1873 		if (check_whitelist(&t, opt) != 0)
1874 			continue;
1875 		if (check_blacklist(&t, opt) == 0)
1876 			continue;
1877 
1878 		test_start_subtest(&t, opt);
1879 		t.tester(cg_fd, opt);
1880 		test_end_subtest();
1881 	}
1882 
1883 	return err;
1884 }
1885 
1886 static void test_selftests_sockmap(int cg_fd, struct sockmap_options *opt)
1887 {
1888 	opt->map = BPF_SOCKMAP_FILENAME;
1889 	__test_selftests(cg_fd, opt);
1890 }
1891 
1892 static void test_selftests_sockhash(int cg_fd, struct sockmap_options *opt)
1893 {
1894 	opt->map = BPF_SOCKHASH_FILENAME;
1895 	__test_selftests(cg_fd, opt);
1896 }
1897 
1898 static void test_selftests_ktls(int cg_fd, struct sockmap_options *opt)
1899 {
1900 	opt->map = BPF_SOCKHASH_FILENAME;
1901 	opt->prepend = "ktls";
1902 	ktls = 1;
1903 	__test_selftests(cg_fd, opt);
1904 	ktls = 0;
1905 }
1906 
1907 static int test_selftest(int cg_fd, struct sockmap_options *opt)
1908 {
1909 
1910 	test_selftests_sockmap(cg_fd, opt);
1911 	test_selftests_sockhash(cg_fd, opt);
1912 	test_selftests_ktls(cg_fd, opt);
1913 	test_print_results();
1914 	return 0;
1915 }
1916 
1917 int main(int argc, char **argv)
1918 {
1919 	int iov_count = 1, length = 1024, rate = 1;
1920 	struct sockmap_options options = {0};
1921 	int opt, longindex, err, cg_fd = 0;
1922 	char *bpf_file = BPF_SOCKMAP_FILENAME;
1923 	int test = SELFTESTS;
1924 	bool cg_created = 0;
1925 
1926 	while ((opt = getopt_long(argc, argv, ":dhv:c:r:i:l:t:p:q:n:b:",
1927 				  long_options, &longindex)) != -1) {
1928 		switch (opt) {
1929 		case 's':
1930 			txmsg_start = atoi(optarg);
1931 			break;
1932 		case 'e':
1933 			txmsg_end = atoi(optarg);
1934 			break;
1935 		case 'p':
1936 			txmsg_start_push = atoi(optarg);
1937 			break;
1938 		case 'q':
1939 			txmsg_end_push = atoi(optarg);
1940 			break;
1941 		case 'w':
1942 			txmsg_start_pop = atoi(optarg);
1943 			break;
1944 		case 'x':
1945 			txmsg_pop = atoi(optarg);
1946 			break;
1947 		case 'a':
1948 			txmsg_apply = atoi(optarg);
1949 			break;
1950 		case 'k':
1951 			txmsg_cork = atoi(optarg);
1952 			break;
1953 		case 'c':
1954 			cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
1955 			if (cg_fd < 0) {
1956 				fprintf(stderr,
1957 					"ERROR: (%i) open cg path failed: %s\n",
1958 					cg_fd, optarg);
1959 				return cg_fd;
1960 			}
1961 			break;
1962 		case 'r':
1963 			rate = atoi(optarg);
1964 			break;
1965 		case 'v':
1966 			options.verbose = 1;
1967 			if (optarg)
1968 				options.verbose = atoi(optarg);
1969 			break;
1970 		case 'i':
1971 			iov_count = atoi(optarg);
1972 			break;
1973 		case 'l':
1974 			length = atoi(optarg);
1975 			break;
1976 		case 'd':
1977 			options.data_test = true;
1978 			break;
1979 		case 't':
1980 			if (strcmp(optarg, "ping") == 0) {
1981 				test = PING_PONG;
1982 			} else if (strcmp(optarg, "sendmsg") == 0) {
1983 				test = SENDMSG;
1984 			} else if (strcmp(optarg, "base") == 0) {
1985 				test = BASE;
1986 			} else if (strcmp(optarg, "base_sendpage") == 0) {
1987 				test = BASE_SENDPAGE;
1988 			} else if (strcmp(optarg, "sendpage") == 0) {
1989 				test = SENDPAGE;
1990 			} else {
1991 				usage(argv);
1992 				return -1;
1993 			}
1994 			break;
1995 		case 'n':
1996 			options.whitelist = strdup(optarg);
1997 			if (!options.whitelist)
1998 				return -ENOMEM;
1999 			break;
2000 		case 'b':
2001 			options.blacklist = strdup(optarg);
2002 			if (!options.blacklist)
2003 				return -ENOMEM;
2004 		case 0:
2005 			break;
2006 		case 'h':
2007 		default:
2008 			usage(argv);
2009 			return -1;
2010 		}
2011 	}
2012 
2013 	if (!cg_fd) {
2014 		cg_fd = cgroup_setup_and_join(CG_PATH);
2015 		if (cg_fd < 0)
2016 			return cg_fd;
2017 		cg_created = 1;
2018 	}
2019 
2020 	if (test == SELFTESTS) {
2021 		err = test_selftest(cg_fd, &options);
2022 		goto out;
2023 	}
2024 
2025 	err = populate_progs(bpf_file);
2026 	if (err) {
2027 		fprintf(stderr, "populate program: (%s) %s\n",
2028 			bpf_file, strerror(errno));
2029 		return 1;
2030 	}
2031 	running = 1;
2032 
2033 	/* catch SIGINT */
2034 	signal(SIGINT, running_handler);
2035 
2036 	options.iov_count = iov_count;
2037 	options.iov_length = length;
2038 	options.rate = rate;
2039 
2040 	err = run_options(&options, cg_fd, test);
2041 out:
2042 	if (options.whitelist)
2043 		free(options.whitelist);
2044 	if (options.blacklist)
2045 		free(options.blacklist);
2046 	if (cg_created)
2047 		cleanup_cgroup_environment();
2048 	close(cg_fd);
2049 	return err;
2050 }
2051 
2052 void running_handler(int a)
2053 {
2054 	running = 0;
2055 }
2056