1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <sys/socket.h>
6 #include <sys/ioctl.h>
7 #include <sys/select.h>
8 #include <netinet/in.h>
9 #include <arpa/inet.h>
10 #include <unistd.h>
11 #include <string.h>
12 #include <errno.h>
13 #include <stdbool.h>
14 #include <signal.h>
15 #include <fcntl.h>
16 #include <sys/wait.h>
17 #include <time.h>
18 #include <sched.h>
19 
20 #include <sys/time.h>
21 #include <sys/resource.h>
22 #include <sys/types.h>
23 #include <sys/sendfile.h>
24 
25 #include <linux/netlink.h>
26 #include <linux/socket.h>
27 #include <linux/sock_diag.h>
28 #include <linux/bpf.h>
29 #include <linux/if_link.h>
30 #include <linux/tls.h>
31 #include <assert.h>
32 #include <libgen.h>
33 
34 #include <getopt.h>
35 
36 #include <bpf/bpf.h>
37 #include <bpf/libbpf.h>
38 
39 #include "bpf_util.h"
40 #include "bpf_rlimit.h"
41 #include "cgroup_helpers.h"
42 
43 int running;
44 static void running_handler(int a);
45 
46 #ifndef TCP_ULP
47 # define TCP_ULP 31
48 #endif
49 #ifndef SOL_TLS
50 # define SOL_TLS 282
51 #endif
52 
53 /* randomly selected ports for testing on lo */
54 #define S1_PORT 10000
55 #define S2_PORT 10001
56 
57 #define BPF_SOCKMAP_FILENAME  "test_sockmap_kern.o"
58 #define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o"
59 #define CG_PATH "/sockmap"
60 
61 /* global sockets */
62 int s1, s2, c1, c2, p1, p2;
63 int test_cnt;
64 int passed;
65 int failed;
66 int map_fd[9];
67 struct bpf_map *maps[9];
68 int prog_fd[11];
69 
70 int txmsg_pass;
71 int txmsg_redir;
72 int txmsg_drop;
73 int txmsg_apply;
74 int txmsg_cork;
75 int txmsg_start;
76 int txmsg_end;
77 int txmsg_start_push;
78 int txmsg_end_push;
79 int txmsg_start_pop;
80 int txmsg_pop;
81 int txmsg_ingress;
82 int txmsg_redir_skb;
83 int txmsg_ktls_skb;
84 int txmsg_ktls_skb_drop;
85 int txmsg_ktls_skb_redir;
86 int ktls;
87 int peek_flag;
88 int skb_use_parser;
89 int txmsg_omit_skb_parser;
90 
91 static const struct option long_options[] = {
92 	{"help",	no_argument,		NULL, 'h' },
93 	{"cgroup",	required_argument,	NULL, 'c' },
94 	{"rate",	required_argument,	NULL, 'r' },
95 	{"verbose",	optional_argument,	NULL, 'v' },
96 	{"iov_count",	required_argument,	NULL, 'i' },
97 	{"length",	required_argument,	NULL, 'l' },
98 	{"test",	required_argument,	NULL, 't' },
99 	{"data_test",   no_argument,		NULL, 'd' },
100 	{"txmsg",		no_argument,	&txmsg_pass,  1  },
101 	{"txmsg_redir",		no_argument,	&txmsg_redir, 1  },
102 	{"txmsg_drop",		no_argument,	&txmsg_drop, 1 },
103 	{"txmsg_apply",	required_argument,	NULL, 'a'},
104 	{"txmsg_cork",	required_argument,	NULL, 'k'},
105 	{"txmsg_start", required_argument,	NULL, 's'},
106 	{"txmsg_end",	required_argument,	NULL, 'e'},
107 	{"txmsg_start_push", required_argument,	NULL, 'p'},
108 	{"txmsg_end_push",   required_argument,	NULL, 'q'},
109 	{"txmsg_start_pop",  required_argument,	NULL, 'w'},
110 	{"txmsg_pop",	     required_argument,	NULL, 'x'},
111 	{"txmsg_ingress", no_argument,		&txmsg_ingress, 1 },
112 	{"txmsg_redir_skb", no_argument,	&txmsg_redir_skb, 1 },
113 	{"ktls", no_argument,			&ktls, 1 },
114 	{"peek", no_argument,			&peek_flag, 1 },
115 	{"txmsg_omit_skb_parser", no_argument,      &txmsg_omit_skb_parser, 1},
116 	{"whitelist", required_argument,	NULL, 'n' },
117 	{"blacklist", required_argument,	NULL, 'b' },
118 	{0, 0, NULL, 0 }
119 };
120 
121 struct test_env {
122 	const char *type;
123 	const char *subtest;
124 	const char *prepend;
125 
126 	int test_num;
127 	int subtest_num;
128 
129 	int succ_cnt;
130 	int fail_cnt;
131 	int fail_last;
132 };
133 
134 struct test_env env;
135 
136 struct sockmap_options {
137 	int verbose;
138 	bool base;
139 	bool sendpage;
140 	bool data_test;
141 	bool drop_expected;
142 	int iov_count;
143 	int iov_length;
144 	int rate;
145 	char *map;
146 	char *whitelist;
147 	char *blacklist;
148 	char *prepend;
149 };
150 
151 struct _test {
152 	char *title;
153 	void (*tester)(int cg_fd, struct sockmap_options *opt);
154 };
155 
156 static void test_start(void)
157 {
158 	env.subtest_num++;
159 }
160 
161 static void test_fail(void)
162 {
163 	env.fail_cnt++;
164 }
165 
166 static void test_pass(void)
167 {
168 	env.succ_cnt++;
169 }
170 
171 static void test_reset(void)
172 {
173 	txmsg_start = txmsg_end = 0;
174 	txmsg_start_pop = txmsg_pop = 0;
175 	txmsg_start_push = txmsg_end_push = 0;
176 	txmsg_pass = txmsg_drop = txmsg_redir = 0;
177 	txmsg_apply = txmsg_cork = 0;
178 	txmsg_ingress = txmsg_redir_skb = 0;
179 	txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
180 	txmsg_omit_skb_parser = 0;
181 	skb_use_parser = 0;
182 }
183 
184 static int test_start_subtest(const struct _test *t, struct sockmap_options *o)
185 {
186 	env.type = o->map;
187 	env.subtest = t->title;
188 	env.prepend = o->prepend;
189 	env.test_num++;
190 	env.subtest_num = 0;
191 	env.fail_last = env.fail_cnt;
192 	test_reset();
193 	return 0;
194 }
195 
196 static void test_end_subtest(void)
197 {
198 	int error = env.fail_cnt - env.fail_last;
199 	int type = strcmp(env.type, BPF_SOCKMAP_FILENAME);
200 
201 	if (!error)
202 		test_pass();
203 
204 	fprintf(stdout, "#%2d/%2d %8s:%s:%s:%s\n",
205 		env.test_num, env.subtest_num,
206 		!type ? "sockmap" : "sockhash",
207 		env.prepend ? : "",
208 		env.subtest, error ? "FAIL" : "OK");
209 }
210 
211 static void test_print_results(void)
212 {
213 	fprintf(stdout, "Pass: %d Fail: %d\n",
214 		env.succ_cnt, env.fail_cnt);
215 }
216 
217 static void usage(char *argv[])
218 {
219 	int i;
220 
221 	printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
222 	printf(" options:\n");
223 	for (i = 0; long_options[i].name != 0; i++) {
224 		printf(" --%-12s", long_options[i].name);
225 		if (long_options[i].flag != NULL)
226 			printf(" flag (internal value:%d)\n",
227 				*long_options[i].flag);
228 		else
229 			printf(" -%c\n", long_options[i].val);
230 	}
231 	printf("\n");
232 }
233 
234 char *sock_to_string(int s)
235 {
236 	if (s == c1)
237 		return "client1";
238 	else if (s == c2)
239 		return "client2";
240 	else if (s == s1)
241 		return "server1";
242 	else if (s == s2)
243 		return "server2";
244 	else if (s == p1)
245 		return "peer1";
246 	else if (s == p2)
247 		return "peer2";
248 	else
249 		return "unknown";
250 }
251 
252 static int sockmap_init_ktls(int verbose, int s)
253 {
254 	struct tls12_crypto_info_aes_gcm_128 tls_tx = {
255 		.info = {
256 			.version     = TLS_1_2_VERSION,
257 			.cipher_type = TLS_CIPHER_AES_GCM_128,
258 		},
259 	};
260 	struct tls12_crypto_info_aes_gcm_128 tls_rx = {
261 		.info = {
262 			.version     = TLS_1_2_VERSION,
263 			.cipher_type = TLS_CIPHER_AES_GCM_128,
264 		},
265 	};
266 	int so_buf = 6553500;
267 	int err;
268 
269 	err = setsockopt(s, 6, TCP_ULP, "tls", sizeof("tls"));
270 	if (err) {
271 		fprintf(stderr, "setsockopt: TCP_ULP(%s) failed with error %i\n", sock_to_string(s), err);
272 		return -EINVAL;
273 	}
274 	err = setsockopt(s, SOL_TLS, TLS_TX, (void *)&tls_tx, sizeof(tls_tx));
275 	if (err) {
276 		fprintf(stderr, "setsockopt: TLS_TX(%s) failed with error %i\n", sock_to_string(s), err);
277 		return -EINVAL;
278 	}
279 	err = setsockopt(s, SOL_TLS, TLS_RX, (void *)&tls_rx, sizeof(tls_rx));
280 	if (err) {
281 		fprintf(stderr, "setsockopt: TLS_RX(%s) failed with error %i\n", sock_to_string(s), err);
282 		return -EINVAL;
283 	}
284 	err = setsockopt(s, SOL_SOCKET, SO_SNDBUF, &so_buf, sizeof(so_buf));
285 	if (err) {
286 		fprintf(stderr, "setsockopt: (%s) failed sndbuf with error %i\n", sock_to_string(s), err);
287 		return -EINVAL;
288 	}
289 	err = setsockopt(s, SOL_SOCKET, SO_RCVBUF, &so_buf, sizeof(so_buf));
290 	if (err) {
291 		fprintf(stderr, "setsockopt: (%s) failed rcvbuf with error %i\n", sock_to_string(s), err);
292 		return -EINVAL;
293 	}
294 
295 	if (verbose)
296 		fprintf(stdout, "socket(%s) kTLS enabled\n", sock_to_string(s));
297 	return 0;
298 }
299 static int sockmap_init_sockets(int verbose)
300 {
301 	int i, err, one = 1;
302 	struct sockaddr_in addr;
303 	int *fds[4] = {&s1, &s2, &c1, &c2};
304 
305 	s1 = s2 = p1 = p2 = c1 = c2 = 0;
306 
307 	/* Init sockets */
308 	for (i = 0; i < 4; i++) {
309 		*fds[i] = socket(AF_INET, SOCK_STREAM, 0);
310 		if (*fds[i] < 0) {
311 			perror("socket s1 failed()");
312 			return errno;
313 		}
314 	}
315 
316 	/* Allow reuse */
317 	for (i = 0; i < 2; i++) {
318 		err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
319 				 (char *)&one, sizeof(one));
320 		if (err) {
321 			perror("setsockopt failed()");
322 			return errno;
323 		}
324 	}
325 
326 	/* Non-blocking sockets */
327 	for (i = 0; i < 2; i++) {
328 		err = ioctl(*fds[i], FIONBIO, (char *)&one);
329 		if (err < 0) {
330 			perror("ioctl s1 failed()");
331 			return errno;
332 		}
333 	}
334 
335 	/* Bind server sockets */
336 	memset(&addr, 0, sizeof(struct sockaddr_in));
337 	addr.sin_family = AF_INET;
338 	addr.sin_addr.s_addr = inet_addr("127.0.0.1");
339 
340 	addr.sin_port = htons(S1_PORT);
341 	err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
342 	if (err < 0) {
343 		perror("bind s1 failed()");
344 		return errno;
345 	}
346 
347 	addr.sin_port = htons(S2_PORT);
348 	err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
349 	if (err < 0) {
350 		perror("bind s2 failed()");
351 		return errno;
352 	}
353 
354 	/* Listen server sockets */
355 	addr.sin_port = htons(S1_PORT);
356 	err = listen(s1, 32);
357 	if (err < 0) {
358 		perror("listen s1 failed()");
359 		return errno;
360 	}
361 
362 	addr.sin_port = htons(S2_PORT);
363 	err = listen(s2, 32);
364 	if (err < 0) {
365 		perror("listen s1 failed()");
366 		return errno;
367 	}
368 
369 	/* Initiate Connect */
370 	addr.sin_port = htons(S1_PORT);
371 	err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
372 	if (err < 0 && errno != EINPROGRESS) {
373 		perror("connect c1 failed()");
374 		return errno;
375 	}
376 
377 	addr.sin_port = htons(S2_PORT);
378 	err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
379 	if (err < 0 && errno != EINPROGRESS) {
380 		perror("connect c2 failed()");
381 		return errno;
382 	} else if (err < 0) {
383 		err = 0;
384 	}
385 
386 	/* Accept Connecrtions */
387 	p1 = accept(s1, NULL, NULL);
388 	if (p1 < 0) {
389 		perror("accept s1 failed()");
390 		return errno;
391 	}
392 
393 	p2 = accept(s2, NULL, NULL);
394 	if (p2 < 0) {
395 		perror("accept s1 failed()");
396 		return errno;
397 	}
398 
399 	if (verbose > 1) {
400 		printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
401 		printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
402 			c1, s1, c2, s2);
403 	}
404 	return 0;
405 }
406 
407 struct msg_stats {
408 	size_t bytes_sent;
409 	size_t bytes_recvd;
410 	struct timespec start;
411 	struct timespec end;
412 };
413 
414 static int msg_loop_sendpage(int fd, int iov_length, int cnt,
415 			     struct msg_stats *s,
416 			     struct sockmap_options *opt)
417 {
418 	bool drop = opt->drop_expected;
419 	unsigned char k = 0;
420 	FILE *file;
421 	int i, fp;
422 
423 	file = tmpfile();
424 	if (!file) {
425 		perror("create file for sendpage");
426 		return 1;
427 	}
428 	for (i = 0; i < iov_length * cnt; i++, k++)
429 		fwrite(&k, sizeof(char), 1, file);
430 	fflush(file);
431 	fseek(file, 0, SEEK_SET);
432 
433 	fp = fileno(file);
434 
435 	clock_gettime(CLOCK_MONOTONIC, &s->start);
436 	for (i = 0; i < cnt; i++) {
437 		int sent;
438 
439 		errno = 0;
440 		sent = sendfile(fd, fp, NULL, iov_length);
441 
442 		if (!drop && sent < 0) {
443 			perror("sendpage loop error");
444 			fclose(file);
445 			return sent;
446 		} else if (drop && sent >= 0) {
447 			printf("sendpage loop error expected: %i errno %i\n",
448 			       sent, errno);
449 			fclose(file);
450 			return -EIO;
451 		}
452 
453 		if (sent > 0)
454 			s->bytes_sent += sent;
455 	}
456 	clock_gettime(CLOCK_MONOTONIC, &s->end);
457 	fclose(file);
458 	return 0;
459 }
460 
461 static void msg_free_iov(struct msghdr *msg)
462 {
463 	int i;
464 
465 	for (i = 0; i < msg->msg_iovlen; i++)
466 		free(msg->msg_iov[i].iov_base);
467 	free(msg->msg_iov);
468 	msg->msg_iov = NULL;
469 	msg->msg_iovlen = 0;
470 }
471 
472 static int msg_alloc_iov(struct msghdr *msg,
473 			 int iov_count, int iov_length,
474 			 bool data, bool xmit)
475 {
476 	unsigned char k = 0;
477 	struct iovec *iov;
478 	int i;
479 
480 	iov = calloc(iov_count, sizeof(struct iovec));
481 	if (!iov)
482 		return errno;
483 
484 	for (i = 0; i < iov_count; i++) {
485 		unsigned char *d = calloc(iov_length, sizeof(char));
486 
487 		if (!d) {
488 			fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
489 			goto unwind_iov;
490 		}
491 		iov[i].iov_base = d;
492 		iov[i].iov_len = iov_length;
493 
494 		if (data && xmit) {
495 			int j;
496 
497 			for (j = 0; j < iov_length; j++)
498 				d[j] = k++;
499 		}
500 	}
501 
502 	msg->msg_iov = iov;
503 	msg->msg_iovlen = iov_count;
504 
505 	return 0;
506 unwind_iov:
507 	for (i--; i >= 0 ; i--)
508 		free(msg->msg_iov[i].iov_base);
509 	return -ENOMEM;
510 }
511 
512 static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
513 {
514 	int i, j = 0, bytes_cnt = 0;
515 	unsigned char k = 0;
516 
517 	for (i = 0; i < msg->msg_iovlen; i++) {
518 		unsigned char *d = msg->msg_iov[i].iov_base;
519 
520 		/* Special case test for skb ingress + ktls */
521 		if (i == 0 && txmsg_ktls_skb) {
522 			if (msg->msg_iov[i].iov_len < 4)
523 				return -EIO;
524 			if (memcmp(d, "PASS", 4) != 0) {
525 				fprintf(stderr,
526 					"detected skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n",
527 					i, 0, d[0], d[1], d[2], d[3]);
528 				return -EIO;
529 			}
530 			j = 4; /* advance index past PASS header */
531 		}
532 
533 		for (; j < msg->msg_iov[i].iov_len && size; j++) {
534 			if (d[j] != k++) {
535 				fprintf(stderr,
536 					"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
537 					i, j, d[j], k - 1, d[j+1], k);
538 				return -EIO;
539 			}
540 			bytes_cnt++;
541 			if (bytes_cnt == chunk_sz) {
542 				k = 0;
543 				bytes_cnt = 0;
544 			}
545 			size--;
546 		}
547 	}
548 	return 0;
549 }
550 
551 static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
552 		    struct msg_stats *s, bool tx,
553 		    struct sockmap_options *opt)
554 {
555 	struct msghdr msg = {0}, msg_peek = {0};
556 	int err, i, flags = MSG_NOSIGNAL;
557 	bool drop = opt->drop_expected;
558 	bool data = opt->data_test;
559 
560 	err = msg_alloc_iov(&msg, iov_count, iov_length, data, tx);
561 	if (err)
562 		goto out_errno;
563 	if (peek_flag) {
564 		err = msg_alloc_iov(&msg_peek, iov_count, iov_length, data, tx);
565 		if (err)
566 			goto out_errno;
567 	}
568 
569 	if (tx) {
570 		clock_gettime(CLOCK_MONOTONIC, &s->start);
571 		for (i = 0; i < cnt; i++) {
572 			int sent;
573 
574 			errno = 0;
575 			sent = sendmsg(fd, &msg, flags);
576 
577 			if (!drop && sent < 0) {
578 				perror("sendmsg loop error");
579 				goto out_errno;
580 			} else if (drop && sent >= 0) {
581 				fprintf(stderr,
582 					"sendmsg loop error expected: %i errno %i\n",
583 					sent, errno);
584 				errno = -EIO;
585 				goto out_errno;
586 			}
587 			if (sent > 0)
588 				s->bytes_sent += sent;
589 		}
590 		clock_gettime(CLOCK_MONOTONIC, &s->end);
591 	} else {
592 		int slct, recvp = 0, recv, max_fd = fd;
593 		float total_bytes, txmsg_pop_total;
594 		int fd_flags = O_NONBLOCK;
595 		struct timeval timeout;
596 		fd_set w;
597 
598 		fcntl(fd, fd_flags);
599 		/* Account for pop bytes noting each iteration of apply will
600 		 * call msg_pop_data helper so we need to account for this
601 		 * by calculating the number of apply iterations. Note user
602 		 * of the tool can create cases where no data is sent by
603 		 * manipulating pop/push/pull/etc. For example txmsg_apply 1
604 		 * with txmsg_pop 1 will try to apply 1B at a time but each
605 		 * iteration will then pop 1B so no data will ever be sent.
606 		 * This is really only useful for testing edge cases in code
607 		 * paths.
608 		 */
609 		total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
610 		if (txmsg_apply)
611 			txmsg_pop_total = txmsg_pop * (total_bytes / txmsg_apply);
612 		else
613 			txmsg_pop_total = txmsg_pop * cnt;
614 		total_bytes -= txmsg_pop_total;
615 		err = clock_gettime(CLOCK_MONOTONIC, &s->start);
616 		if (err < 0)
617 			perror("recv start time");
618 		while (s->bytes_recvd < total_bytes) {
619 			if (txmsg_cork) {
620 				timeout.tv_sec = 0;
621 				timeout.tv_usec = 300000;
622 			} else {
623 				timeout.tv_sec = 3;
624 				timeout.tv_usec = 0;
625 			}
626 
627 			/* FD sets */
628 			FD_ZERO(&w);
629 			FD_SET(fd, &w);
630 
631 			slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
632 			if (slct == -1) {
633 				perror("select()");
634 				clock_gettime(CLOCK_MONOTONIC, &s->end);
635 				goto out_errno;
636 			} else if (!slct) {
637 				if (opt->verbose)
638 					fprintf(stderr, "unexpected timeout: recved %zu/%f pop_total %f\n", s->bytes_recvd, total_bytes, txmsg_pop_total);
639 				errno = -EIO;
640 				clock_gettime(CLOCK_MONOTONIC, &s->end);
641 				goto out_errno;
642 			}
643 
644 			errno = 0;
645 			if (peek_flag) {
646 				flags |= MSG_PEEK;
647 				recvp = recvmsg(fd, &msg_peek, flags);
648 				if (recvp < 0) {
649 					if (errno != EWOULDBLOCK) {
650 						clock_gettime(CLOCK_MONOTONIC, &s->end);
651 						goto out_errno;
652 					}
653 				}
654 				flags = 0;
655 			}
656 
657 			recv = recvmsg(fd, &msg, flags);
658 			if (recv < 0) {
659 				if (errno != EWOULDBLOCK) {
660 					clock_gettime(CLOCK_MONOTONIC, &s->end);
661 					perror("recv failed()");
662 					goto out_errno;
663 				}
664 			}
665 
666 			s->bytes_recvd += recv;
667 
668 			if (data) {
669 				int chunk_sz = opt->sendpage ?
670 						iov_length * cnt :
671 						iov_length * iov_count;
672 
673 				errno = msg_verify_data(&msg, recv, chunk_sz);
674 				if (errno) {
675 					perror("data verify msg failed");
676 					goto out_errno;
677 				}
678 				if (recvp) {
679 					errno = msg_verify_data(&msg_peek,
680 								recvp,
681 								chunk_sz);
682 					if (errno) {
683 						perror("data verify msg_peek failed");
684 						goto out_errno;
685 					}
686 				}
687 			}
688 		}
689 		clock_gettime(CLOCK_MONOTONIC, &s->end);
690 	}
691 
692 	msg_free_iov(&msg);
693 	msg_free_iov(&msg_peek);
694 	return err;
695 out_errno:
696 	msg_free_iov(&msg);
697 	msg_free_iov(&msg_peek);
698 	return errno;
699 }
700 
701 static float giga = 1000000000;
702 
703 static inline float sentBps(struct msg_stats s)
704 {
705 	return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
706 }
707 
708 static inline float recvdBps(struct msg_stats s)
709 {
710 	return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
711 }
712 
713 static int sendmsg_test(struct sockmap_options *opt)
714 {
715 	float sent_Bps = 0, recvd_Bps = 0;
716 	int rx_fd, txpid, rxpid, err = 0;
717 	struct msg_stats s = {0};
718 	int iov_count = opt->iov_count;
719 	int iov_buf = opt->iov_length;
720 	int rx_status, tx_status;
721 	int cnt = opt->rate;
722 
723 	errno = 0;
724 
725 	if (opt->base)
726 		rx_fd = p1;
727 	else
728 		rx_fd = p2;
729 
730 	if (ktls) {
731 		/* Redirecting into non-TLS socket which sends into a TLS
732 		 * socket is not a valid test. So in this case lets not
733 		 * enable kTLS but still run the test.
734 		 */
735 		if (!txmsg_redir || txmsg_ingress) {
736 			err = sockmap_init_ktls(opt->verbose, rx_fd);
737 			if (err)
738 				return err;
739 		}
740 		err = sockmap_init_ktls(opt->verbose, c1);
741 		if (err)
742 			return err;
743 	}
744 
745 	rxpid = fork();
746 	if (rxpid == 0) {
747 		iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
748 		if (opt->drop_expected || txmsg_ktls_skb_drop)
749 			_exit(0);
750 
751 		if (!iov_buf) /* zero bytes sent case */
752 			_exit(0);
753 
754 		if (opt->sendpage)
755 			iov_count = 1;
756 		err = msg_loop(rx_fd, iov_count, iov_buf,
757 			       cnt, &s, false, opt);
758 		if (opt->verbose > 1)
759 			fprintf(stderr,
760 				"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
761 				iov_count, iov_buf, cnt, err);
762 		if (s.end.tv_sec - s.start.tv_sec) {
763 			sent_Bps = sentBps(s);
764 			recvd_Bps = recvdBps(s);
765 		}
766 		if (opt->verbose > 1)
767 			fprintf(stdout,
768 				"rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n",
769 				s.bytes_sent, sent_Bps, sent_Bps/giga,
770 				s.bytes_recvd, recvd_Bps, recvd_Bps/giga,
771 				peek_flag ? "(peek_msg)" : "");
772 		if (err && txmsg_cork)
773 			err = 0;
774 		exit(err ? 1 : 0);
775 	} else if (rxpid == -1) {
776 		perror("msg_loop_rx");
777 		return errno;
778 	}
779 
780 	txpid = fork();
781 	if (txpid == 0) {
782 		if (opt->sendpage)
783 			err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
784 		else
785 			err = msg_loop(c1, iov_count, iov_buf,
786 				       cnt, &s, true, opt);
787 
788 		if (err)
789 			fprintf(stderr,
790 				"msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
791 				iov_count, iov_buf, cnt, err);
792 		if (s.end.tv_sec - s.start.tv_sec) {
793 			sent_Bps = sentBps(s);
794 			recvd_Bps = recvdBps(s);
795 		}
796 		if (opt->verbose > 1)
797 			fprintf(stdout,
798 				"tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
799 				s.bytes_sent, sent_Bps, sent_Bps/giga,
800 				s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
801 		exit(err ? 1 : 0);
802 	} else if (txpid == -1) {
803 		perror("msg_loop_tx");
804 		return errno;
805 	}
806 
807 	assert(waitpid(rxpid, &rx_status, 0) == rxpid);
808 	assert(waitpid(txpid, &tx_status, 0) == txpid);
809 	if (WIFEXITED(rx_status)) {
810 		err = WEXITSTATUS(rx_status);
811 		if (err) {
812 			fprintf(stderr, "rx thread exited with err %d.\n", err);
813 			goto out;
814 		}
815 	}
816 	if (WIFEXITED(tx_status)) {
817 		err = WEXITSTATUS(tx_status);
818 		if (err)
819 			fprintf(stderr, "tx thread exited with err %d.\n", err);
820 	}
821 out:
822 	return err;
823 }
824 
825 static int forever_ping_pong(int rate, struct sockmap_options *opt)
826 {
827 	struct timeval timeout;
828 	char buf[1024] = {0};
829 	int sc;
830 
831 	timeout.tv_sec = 10;
832 	timeout.tv_usec = 0;
833 
834 	/* Ping/Pong data from client to server */
835 	sc = send(c1, buf, sizeof(buf), 0);
836 	if (sc < 0) {
837 		perror("send failed()");
838 		return sc;
839 	}
840 
841 	do {
842 		int s, rc, i, max_fd = p2;
843 		fd_set w;
844 
845 		/* FD sets */
846 		FD_ZERO(&w);
847 		FD_SET(c1, &w);
848 		FD_SET(c2, &w);
849 		FD_SET(p1, &w);
850 		FD_SET(p2, &w);
851 
852 		s = select(max_fd + 1, &w, NULL, NULL, &timeout);
853 		if (s == -1) {
854 			perror("select()");
855 			break;
856 		} else if (!s) {
857 			fprintf(stderr, "unexpected timeout\n");
858 			break;
859 		}
860 
861 		for (i = 0; i <= max_fd && s > 0; ++i) {
862 			if (!FD_ISSET(i, &w))
863 				continue;
864 
865 			s--;
866 
867 			rc = recv(i, buf, sizeof(buf), 0);
868 			if (rc < 0) {
869 				if (errno != EWOULDBLOCK) {
870 					perror("recv failed()");
871 					return rc;
872 				}
873 			}
874 
875 			if (rc == 0) {
876 				close(i);
877 				break;
878 			}
879 
880 			sc = send(i, buf, rc, 0);
881 			if (sc < 0) {
882 				perror("send failed()");
883 				return sc;
884 			}
885 		}
886 
887 		if (rate)
888 			sleep(rate);
889 
890 		if (opt->verbose) {
891 			printf(".");
892 			fflush(stdout);
893 
894 		}
895 	} while (running);
896 
897 	return 0;
898 }
899 
900 enum {
901 	SELFTESTS,
902 	PING_PONG,
903 	SENDMSG,
904 	BASE,
905 	BASE_SENDPAGE,
906 	SENDPAGE,
907 };
908 
909 static int run_options(struct sockmap_options *options, int cg_fd,  int test)
910 {
911 	int i, key, next_key, err, tx_prog_fd = -1, zero = 0;
912 
913 	/* If base test skip BPF setup */
914 	if (test == BASE || test == BASE_SENDPAGE)
915 		goto run;
916 
917 	/* Attach programs to sockmap */
918 	if (!txmsg_omit_skb_parser) {
919 		err = bpf_prog_attach(prog_fd[0], map_fd[0],
920 				      BPF_SK_SKB_STREAM_PARSER, 0);
921 		if (err) {
922 			fprintf(stderr,
923 				"ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
924 				prog_fd[0], map_fd[0], err, strerror(errno));
925 			return err;
926 		}
927 	}
928 
929 	err = bpf_prog_attach(prog_fd[1], map_fd[0],
930 				BPF_SK_SKB_STREAM_VERDICT, 0);
931 	if (err) {
932 		fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
933 			err, strerror(errno));
934 		return err;
935 	}
936 
937 	/* Attach programs to TLS sockmap */
938 	if (txmsg_ktls_skb) {
939 		if (!txmsg_omit_skb_parser) {
940 			err = bpf_prog_attach(prog_fd[0], map_fd[8],
941 					      BPF_SK_SKB_STREAM_PARSER, 0);
942 			if (err) {
943 				fprintf(stderr,
944 					"ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
945 					prog_fd[0], map_fd[8], err, strerror(errno));
946 				return err;
947 			}
948 		}
949 
950 		err = bpf_prog_attach(prog_fd[2], map_fd[8],
951 				      BPF_SK_SKB_STREAM_VERDICT, 0);
952 		if (err) {
953 			fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n",
954 				err, strerror(errno));
955 			return err;
956 		}
957 	}
958 
959 	/* Attach to cgroups */
960 	err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
961 	if (err) {
962 		fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
963 			err, strerror(errno));
964 		return err;
965 	}
966 
967 run:
968 	err = sockmap_init_sockets(options->verbose);
969 	if (err) {
970 		fprintf(stderr, "ERROR: test socket failed: %d\n", err);
971 		goto out;
972 	}
973 
974 	/* Attach txmsg program to sockmap */
975 	if (txmsg_pass)
976 		tx_prog_fd = prog_fd[4];
977 	else if (txmsg_redir)
978 		tx_prog_fd = prog_fd[5];
979 	else if (txmsg_apply)
980 		tx_prog_fd = prog_fd[6];
981 	else if (txmsg_cork)
982 		tx_prog_fd = prog_fd[7];
983 	else if (txmsg_drop)
984 		tx_prog_fd = prog_fd[8];
985 	else
986 		tx_prog_fd = 0;
987 
988 	if (tx_prog_fd) {
989 		int redir_fd, i = 0;
990 
991 		err = bpf_prog_attach(tx_prog_fd,
992 				      map_fd[1], BPF_SK_MSG_VERDICT, 0);
993 		if (err) {
994 			fprintf(stderr,
995 				"ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
996 				err, strerror(errno));
997 			goto out;
998 		}
999 
1000 		err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
1001 		if (err) {
1002 			fprintf(stderr,
1003 				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
1004 				err, strerror(errno));
1005 			goto out;
1006 		}
1007 
1008 		if (txmsg_redir)
1009 			redir_fd = c2;
1010 		else
1011 			redir_fd = c1;
1012 
1013 		err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
1014 		if (err) {
1015 			fprintf(stderr,
1016 				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
1017 				err, strerror(errno));
1018 			goto out;
1019 		}
1020 
1021 		if (txmsg_apply) {
1022 			err = bpf_map_update_elem(map_fd[3],
1023 						  &i, &txmsg_apply, BPF_ANY);
1024 			if (err) {
1025 				fprintf(stderr,
1026 					"ERROR: bpf_map_update_elem (apply_bytes):  %d (%s\n",
1027 					err, strerror(errno));
1028 				goto out;
1029 			}
1030 		}
1031 
1032 		if (txmsg_cork) {
1033 			err = bpf_map_update_elem(map_fd[4],
1034 						  &i, &txmsg_cork, BPF_ANY);
1035 			if (err) {
1036 				fprintf(stderr,
1037 					"ERROR: bpf_map_update_elem (cork_bytes):  %d (%s\n",
1038 					err, strerror(errno));
1039 				goto out;
1040 			}
1041 		}
1042 
1043 		if (txmsg_start) {
1044 			err = bpf_map_update_elem(map_fd[5],
1045 						  &i, &txmsg_start, BPF_ANY);
1046 			if (err) {
1047 				fprintf(stderr,
1048 					"ERROR: bpf_map_update_elem (txmsg_start):  %d (%s)\n",
1049 					err, strerror(errno));
1050 				goto out;
1051 			}
1052 		}
1053 
1054 		if (txmsg_end) {
1055 			i = 1;
1056 			err = bpf_map_update_elem(map_fd[5],
1057 						  &i, &txmsg_end, BPF_ANY);
1058 			if (err) {
1059 				fprintf(stderr,
1060 					"ERROR: bpf_map_update_elem (txmsg_end):  %d (%s)\n",
1061 					err, strerror(errno));
1062 				goto out;
1063 			}
1064 		}
1065 
1066 		if (txmsg_start_push) {
1067 			i = 2;
1068 			err = bpf_map_update_elem(map_fd[5],
1069 						  &i, &txmsg_start_push, BPF_ANY);
1070 			if (err) {
1071 				fprintf(stderr,
1072 					"ERROR: bpf_map_update_elem (txmsg_start_push):  %d (%s)\n",
1073 					err, strerror(errno));
1074 				goto out;
1075 			}
1076 		}
1077 
1078 		if (txmsg_end_push) {
1079 			i = 3;
1080 			err = bpf_map_update_elem(map_fd[5],
1081 						  &i, &txmsg_end_push, BPF_ANY);
1082 			if (err) {
1083 				fprintf(stderr,
1084 					"ERROR: bpf_map_update_elem %i@%i (txmsg_end_push):  %d (%s)\n",
1085 					txmsg_end_push, i, err, strerror(errno));
1086 				goto out;
1087 			}
1088 		}
1089 
1090 		if (txmsg_start_pop) {
1091 			i = 4;
1092 			err = bpf_map_update_elem(map_fd[5],
1093 						  &i, &txmsg_start_pop, BPF_ANY);
1094 			if (err) {
1095 				fprintf(stderr,
1096 					"ERROR: bpf_map_update_elem %i@%i (txmsg_start_pop):  %d (%s)\n",
1097 					txmsg_start_pop, i, err, strerror(errno));
1098 				goto out;
1099 			}
1100 		} else {
1101 			i = 4;
1102 			bpf_map_update_elem(map_fd[5],
1103 						  &i, &txmsg_start_pop, BPF_ANY);
1104 		}
1105 
1106 		if (txmsg_pop) {
1107 			i = 5;
1108 			err = bpf_map_update_elem(map_fd[5],
1109 						  &i, &txmsg_pop, BPF_ANY);
1110 			if (err) {
1111 				fprintf(stderr,
1112 					"ERROR: bpf_map_update_elem %i@%i (txmsg_pop):  %d (%s)\n",
1113 					txmsg_pop, i, err, strerror(errno));
1114 				goto out;
1115 			}
1116 		} else {
1117 			i = 5;
1118 			bpf_map_update_elem(map_fd[5],
1119 					    &i, &txmsg_pop, BPF_ANY);
1120 
1121 		}
1122 
1123 		if (txmsg_ingress) {
1124 			int in = BPF_F_INGRESS;
1125 
1126 			i = 0;
1127 			err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
1128 			if (err) {
1129 				fprintf(stderr,
1130 					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
1131 					err, strerror(errno));
1132 			}
1133 			i = 1;
1134 			err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
1135 			if (err) {
1136 				fprintf(stderr,
1137 					"ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
1138 					err, strerror(errno));
1139 			}
1140 			err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
1141 			if (err) {
1142 				fprintf(stderr,
1143 					"ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
1144 					err, strerror(errno));
1145 			}
1146 
1147 			i = 2;
1148 			err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
1149 			if (err) {
1150 				fprintf(stderr,
1151 					"ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
1152 					err, strerror(errno));
1153 			}
1154 		}
1155 
1156 		if (txmsg_ktls_skb) {
1157 			int ingress = BPF_F_INGRESS;
1158 
1159 			i = 0;
1160 			err = bpf_map_update_elem(map_fd[8], &i, &p2, BPF_ANY);
1161 			if (err) {
1162 				fprintf(stderr,
1163 					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
1164 					err, strerror(errno));
1165 			}
1166 
1167 			if (txmsg_ktls_skb_redir) {
1168 				i = 1;
1169 				err = bpf_map_update_elem(map_fd[7],
1170 							  &i, &ingress, BPF_ANY);
1171 				if (err) {
1172 					fprintf(stderr,
1173 						"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
1174 						err, strerror(errno));
1175 				}
1176 			}
1177 
1178 			if (txmsg_ktls_skb_drop) {
1179 				i = 1;
1180 				err = bpf_map_update_elem(map_fd[7], &i, &i, BPF_ANY);
1181 			}
1182 		}
1183 
1184 		if (txmsg_redir_skb) {
1185 			int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
1186 					p2 : p1;
1187 			int ingress = BPF_F_INGRESS;
1188 
1189 			i = 0;
1190 			err = bpf_map_update_elem(map_fd[7],
1191 						  &i, &ingress, BPF_ANY);
1192 			if (err) {
1193 				fprintf(stderr,
1194 					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
1195 					err, strerror(errno));
1196 			}
1197 
1198 			i = 3;
1199 			err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
1200 			if (err) {
1201 				fprintf(stderr,
1202 					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
1203 					err, strerror(errno));
1204 			}
1205 		}
1206 	}
1207 
1208 	if (skb_use_parser) {
1209 		i = 2;
1210 		err = bpf_map_update_elem(map_fd[7], &i, &skb_use_parser, BPF_ANY);
1211 	}
1212 
1213 	if (txmsg_drop)
1214 		options->drop_expected = true;
1215 
1216 	if (test == PING_PONG)
1217 		err = forever_ping_pong(options->rate, options);
1218 	else if (test == SENDMSG) {
1219 		options->base = false;
1220 		options->sendpage = false;
1221 		err = sendmsg_test(options);
1222 	} else if (test == SENDPAGE) {
1223 		options->base = false;
1224 		options->sendpage = true;
1225 		err = sendmsg_test(options);
1226 	} else if (test == BASE) {
1227 		options->base = true;
1228 		options->sendpage = false;
1229 		err = sendmsg_test(options);
1230 	} else if (test == BASE_SENDPAGE) {
1231 		options->base = true;
1232 		options->sendpage = true;
1233 		err = sendmsg_test(options);
1234 	} else
1235 		fprintf(stderr, "unknown test\n");
1236 out:
1237 	/* Detatch and zero all the maps */
1238 	bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS);
1239 	bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
1240 	bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
1241 	bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER);
1242 	bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT);
1243 
1244 	if (tx_prog_fd >= 0)
1245 		bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
1246 
1247 	for (i = 0; i < 8; i++) {
1248 		key = next_key = 0;
1249 		bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
1250 		while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) {
1251 			bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
1252 			key = next_key;
1253 		}
1254 	}
1255 
1256 	close(s1);
1257 	close(s2);
1258 	close(p1);
1259 	close(p2);
1260 	close(c1);
1261 	close(c2);
1262 	return err;
1263 }
1264 
1265 static char *test_to_str(int test)
1266 {
1267 	switch (test) {
1268 	case SENDMSG:
1269 		return "sendmsg";
1270 	case SENDPAGE:
1271 		return "sendpage";
1272 	}
1273 	return "unknown";
1274 }
1275 
1276 static void append_str(char *dst, const char *src, size_t dst_cap)
1277 {
1278 	size_t avail = dst_cap - strlen(dst);
1279 
1280 	if (avail <= 1) /* just zero byte could be written */
1281 		return;
1282 
1283 	strncat(dst, src, avail - 1); /* strncat() adds + 1 for zero byte */
1284 }
1285 
1286 #define OPTSTRING 60
1287 static void test_options(char *options)
1288 {
1289 	char tstr[OPTSTRING];
1290 
1291 	memset(options, 0, OPTSTRING);
1292 
1293 	if (txmsg_pass)
1294 		append_str(options, "pass,", OPTSTRING);
1295 	if (txmsg_redir)
1296 		append_str(options, "redir,", OPTSTRING);
1297 	if (txmsg_drop)
1298 		append_str(options, "drop,", OPTSTRING);
1299 	if (txmsg_apply) {
1300 		snprintf(tstr, OPTSTRING, "apply %d,", txmsg_apply);
1301 		append_str(options, tstr, OPTSTRING);
1302 	}
1303 	if (txmsg_cork) {
1304 		snprintf(tstr, OPTSTRING, "cork %d,", txmsg_cork);
1305 		append_str(options, tstr, OPTSTRING);
1306 	}
1307 	if (txmsg_start) {
1308 		snprintf(tstr, OPTSTRING, "start %d,", txmsg_start);
1309 		append_str(options, tstr, OPTSTRING);
1310 	}
1311 	if (txmsg_end) {
1312 		snprintf(tstr, OPTSTRING, "end %d,", txmsg_end);
1313 		append_str(options, tstr, OPTSTRING);
1314 	}
1315 	if (txmsg_start_pop) {
1316 		snprintf(tstr, OPTSTRING, "pop (%d,%d),",
1317 			 txmsg_start_pop, txmsg_start_pop + txmsg_pop);
1318 		append_str(options, tstr, OPTSTRING);
1319 	}
1320 	if (txmsg_ingress)
1321 		append_str(options, "ingress,", OPTSTRING);
1322 	if (txmsg_redir_skb)
1323 		append_str(options, "redir_skb,", OPTSTRING);
1324 	if (txmsg_ktls_skb)
1325 		append_str(options, "ktls_skb,", OPTSTRING);
1326 	if (ktls)
1327 		append_str(options, "ktls,", OPTSTRING);
1328 	if (peek_flag)
1329 		append_str(options, "peek,", OPTSTRING);
1330 }
1331 
1332 static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
1333 {
1334 	char *options = calloc(OPTSTRING, sizeof(char));
1335 	int err;
1336 
1337 	if (test == SENDPAGE)
1338 		opt->sendpage = true;
1339 	else
1340 		opt->sendpage = false;
1341 
1342 	if (txmsg_drop)
1343 		opt->drop_expected = true;
1344 	else
1345 		opt->drop_expected = false;
1346 
1347 	test_options(options);
1348 
1349 	if (opt->verbose) {
1350 		fprintf(stdout,
1351 			" [TEST %i]: (%i, %i, %i, %s, %s): ",
1352 			test_cnt, opt->rate, opt->iov_count, opt->iov_length,
1353 			test_to_str(test), options);
1354 		fflush(stdout);
1355 	}
1356 	err = run_options(opt, cgrp, test);
1357 	if (opt->verbose)
1358 		fprintf(stdout, " %s\n", !err ? "PASS" : "FAILED");
1359 	test_cnt++;
1360 	!err ? passed++ : failed++;
1361 	free(options);
1362 	return err;
1363 }
1364 
1365 static void test_exec(int cgrp, struct sockmap_options *opt)
1366 {
1367 	int type = strcmp(opt->map, BPF_SOCKMAP_FILENAME);
1368 	int err;
1369 
1370 	if (type == 0) {
1371 		test_start();
1372 		err = __test_exec(cgrp, SENDMSG, opt);
1373 		if (err)
1374 			test_fail();
1375 	} else {
1376 		test_start();
1377 		err = __test_exec(cgrp, SENDPAGE, opt);
1378 		if (err)
1379 			test_fail();
1380 	}
1381 }
1382 
1383 static void test_send_one(struct sockmap_options *opt, int cgrp)
1384 {
1385 	opt->iov_length = 1;
1386 	opt->iov_count = 1;
1387 	opt->rate = 1;
1388 	test_exec(cgrp, opt);
1389 
1390 	opt->iov_length = 1;
1391 	opt->iov_count = 1024;
1392 	opt->rate = 1;
1393 	test_exec(cgrp, opt);
1394 
1395 	opt->iov_length = 1024;
1396 	opt->iov_count = 1;
1397 	opt->rate = 1;
1398 	test_exec(cgrp, opt);
1399 
1400 }
1401 
1402 static void test_send_many(struct sockmap_options *opt, int cgrp)
1403 {
1404 	opt->iov_length = 3;
1405 	opt->iov_count = 1;
1406 	opt->rate = 512;
1407 	test_exec(cgrp, opt);
1408 
1409 	opt->rate = 100;
1410 	opt->iov_count = 1;
1411 	opt->iov_length = 5;
1412 	test_exec(cgrp, opt);
1413 }
1414 
1415 static void test_send_large(struct sockmap_options *opt, int cgrp)
1416 {
1417 	opt->iov_length = 256;
1418 	opt->iov_count = 1024;
1419 	opt->rate = 2;
1420 	test_exec(cgrp, opt);
1421 }
1422 
1423 static void test_send(struct sockmap_options *opt, int cgrp)
1424 {
1425 	test_send_one(opt, cgrp);
1426 	test_send_many(opt, cgrp);
1427 	test_send_large(opt, cgrp);
1428 	sched_yield();
1429 }
1430 
1431 static void test_txmsg_pass(int cgrp, struct sockmap_options *opt)
1432 {
1433 	/* Test small and large iov_count values with pass/redir/apply/cork */
1434 	txmsg_pass = 1;
1435 	test_send(opt, cgrp);
1436 }
1437 
1438 static void test_txmsg_redir(int cgrp, struct sockmap_options *opt)
1439 {
1440 	txmsg_redir = 1;
1441 	test_send(opt, cgrp);
1442 }
1443 
1444 static void test_txmsg_drop(int cgrp, struct sockmap_options *opt)
1445 {
1446 	txmsg_drop = 1;
1447 	test_send(opt, cgrp);
1448 }
1449 
1450 static void test_txmsg_ingress_redir(int cgrp, struct sockmap_options *opt)
1451 {
1452 	txmsg_pass = txmsg_drop = 0;
1453 	txmsg_ingress = txmsg_redir = 1;
1454 	test_send(opt, cgrp);
1455 }
1456 
1457 static void test_txmsg_skb(int cgrp, struct sockmap_options *opt)
1458 {
1459 	bool data = opt->data_test;
1460 	int k = ktls;
1461 
1462 	opt->data_test = true;
1463 	ktls = 1;
1464 
1465 	txmsg_pass = txmsg_drop = 0;
1466 	txmsg_ingress = txmsg_redir = 0;
1467 	txmsg_ktls_skb = 1;
1468 	txmsg_pass = 1;
1469 
1470 	/* Using data verification so ensure iov layout is
1471 	 * expected from test receiver side. e.g. has enough
1472 	 * bytes to write test code.
1473 	 */
1474 	opt->iov_length = 100;
1475 	opt->iov_count = 1;
1476 	opt->rate = 1;
1477 	test_exec(cgrp, opt);
1478 
1479 	txmsg_ktls_skb_drop = 1;
1480 	test_exec(cgrp, opt);
1481 
1482 	txmsg_ktls_skb_drop = 0;
1483 	txmsg_ktls_skb_redir = 1;
1484 	test_exec(cgrp, opt);
1485 	txmsg_ktls_skb_redir = 0;
1486 
1487 	/* Tests that omit skb_parser */
1488 	txmsg_omit_skb_parser = 1;
1489 	ktls = 0;
1490 	txmsg_ktls_skb = 0;
1491 	test_exec(cgrp, opt);
1492 
1493 	txmsg_ktls_skb_drop = 1;
1494 	test_exec(cgrp, opt);
1495 	txmsg_ktls_skb_drop = 0;
1496 
1497 	txmsg_ktls_skb_redir = 1;
1498 	test_exec(cgrp, opt);
1499 
1500 	ktls = 1;
1501 	test_exec(cgrp, opt);
1502 	txmsg_omit_skb_parser = 0;
1503 
1504 	opt->data_test = data;
1505 	ktls = k;
1506 }
1507 
1508 /* Test cork with hung data. This tests poor usage patterns where
1509  * cork can leave data on the ring if user program is buggy and
1510  * doesn't flush them somehow. They do take some time however
1511  * because they wait for a timeout. Test pass, redir and cork with
1512  * apply logic. Use cork size of 4097 with send_large to avoid
1513  * aligning cork size with send size.
1514  */
1515 static void test_txmsg_cork_hangs(int cgrp, struct sockmap_options *opt)
1516 {
1517 	txmsg_pass = 1;
1518 	txmsg_redir = 0;
1519 	txmsg_cork = 4097;
1520 	txmsg_apply = 4097;
1521 	test_send_large(opt, cgrp);
1522 
1523 	txmsg_pass = 0;
1524 	txmsg_redir = 1;
1525 	txmsg_apply = 0;
1526 	txmsg_cork = 4097;
1527 	test_send_large(opt, cgrp);
1528 
1529 	txmsg_pass = 0;
1530 	txmsg_redir = 1;
1531 	txmsg_apply = 4097;
1532 	txmsg_cork = 4097;
1533 	test_send_large(opt, cgrp);
1534 }
1535 
1536 static void test_txmsg_pull(int cgrp, struct sockmap_options *opt)
1537 {
1538 	/* Test basic start/end */
1539 	txmsg_start = 1;
1540 	txmsg_end = 2;
1541 	test_send(opt, cgrp);
1542 
1543 	/* Test >4k pull */
1544 	txmsg_start = 4096;
1545 	txmsg_end = 9182;
1546 	test_send_large(opt, cgrp);
1547 
1548 	/* Test pull + redirect */
1549 	txmsg_redir = 0;
1550 	txmsg_start = 1;
1551 	txmsg_end = 2;
1552 	test_send(opt, cgrp);
1553 
1554 	/* Test pull + cork */
1555 	txmsg_redir = 0;
1556 	txmsg_cork = 512;
1557 	txmsg_start = 1;
1558 	txmsg_end = 2;
1559 	test_send_many(opt, cgrp);
1560 
1561 	/* Test pull + cork + redirect */
1562 	txmsg_redir = 1;
1563 	txmsg_cork = 512;
1564 	txmsg_start = 1;
1565 	txmsg_end = 2;
1566 	test_send_many(opt, cgrp);
1567 }
1568 
1569 static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
1570 {
1571 	/* Test basic pop */
1572 	txmsg_start_pop = 1;
1573 	txmsg_pop = 2;
1574 	test_send_many(opt, cgrp);
1575 
1576 	/* Test pop with >4k */
1577 	txmsg_start_pop = 4096;
1578 	txmsg_pop = 4096;
1579 	test_send_large(opt, cgrp);
1580 
1581 	/* Test pop + redirect */
1582 	txmsg_redir = 1;
1583 	txmsg_start_pop = 1;
1584 	txmsg_pop = 2;
1585 	test_send_many(opt, cgrp);
1586 
1587 	/* Test pop + cork */
1588 	txmsg_redir = 0;
1589 	txmsg_cork = 512;
1590 	txmsg_start_pop = 1;
1591 	txmsg_pop = 2;
1592 	test_send_many(opt, cgrp);
1593 
1594 	/* Test pop + redirect + cork */
1595 	txmsg_redir = 1;
1596 	txmsg_cork = 4;
1597 	txmsg_start_pop = 1;
1598 	txmsg_pop = 2;
1599 	test_send_many(opt, cgrp);
1600 }
1601 
1602 static void test_txmsg_push(int cgrp, struct sockmap_options *opt)
1603 {
1604 	/* Test basic push */
1605 	txmsg_start_push = 1;
1606 	txmsg_end_push = 1;
1607 	test_send(opt, cgrp);
1608 
1609 	/* Test push 4kB >4k */
1610 	txmsg_start_push = 4096;
1611 	txmsg_end_push = 4096;
1612 	test_send_large(opt, cgrp);
1613 
1614 	/* Test push + redirect */
1615 	txmsg_redir = 1;
1616 	txmsg_start_push = 1;
1617 	txmsg_end_push = 2;
1618 	test_send_many(opt, cgrp);
1619 
1620 	/* Test push + cork */
1621 	txmsg_redir = 0;
1622 	txmsg_cork = 512;
1623 	txmsg_start_push = 1;
1624 	txmsg_end_push = 2;
1625 	test_send_many(opt, cgrp);
1626 }
1627 
1628 static void test_txmsg_push_pop(int cgrp, struct sockmap_options *opt)
1629 {
1630 	txmsg_start_push = 1;
1631 	txmsg_end_push = 10;
1632 	txmsg_start_pop = 5;
1633 	txmsg_pop = 4;
1634 	test_send_large(opt, cgrp);
1635 }
1636 
1637 static void test_txmsg_apply(int cgrp, struct sockmap_options *opt)
1638 {
1639 	txmsg_pass = 1;
1640 	txmsg_redir = 0;
1641 	txmsg_apply = 1;
1642 	txmsg_cork = 0;
1643 	test_send_one(opt, cgrp);
1644 
1645 	txmsg_pass = 0;
1646 	txmsg_redir = 1;
1647 	txmsg_apply = 1;
1648 	txmsg_cork = 0;
1649 	test_send_one(opt, cgrp);
1650 
1651 	txmsg_pass = 1;
1652 	txmsg_redir = 0;
1653 	txmsg_apply = 1024;
1654 	txmsg_cork = 0;
1655 	test_send_large(opt, cgrp);
1656 
1657 	txmsg_pass = 0;
1658 	txmsg_redir = 1;
1659 	txmsg_apply = 1024;
1660 	txmsg_cork = 0;
1661 	test_send_large(opt, cgrp);
1662 }
1663 
1664 static void test_txmsg_cork(int cgrp, struct sockmap_options *opt)
1665 {
1666 	txmsg_pass = 1;
1667 	txmsg_redir = 0;
1668 	txmsg_apply = 0;
1669 	txmsg_cork = 1;
1670 	test_send(opt, cgrp);
1671 
1672 	txmsg_pass = 1;
1673 	txmsg_redir = 0;
1674 	txmsg_apply = 1;
1675 	txmsg_cork = 1;
1676 	test_send(opt, cgrp);
1677 }
1678 
1679 static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt)
1680 {
1681 	txmsg_pass = 1;
1682 	skb_use_parser = 512;
1683 	opt->iov_length = 256;
1684 	opt->iov_count = 1;
1685 	opt->rate = 2;
1686 	test_exec(cgrp, opt);
1687 }
1688 
1689 char *map_names[] = {
1690 	"sock_map",
1691 	"sock_map_txmsg",
1692 	"sock_map_redir",
1693 	"sock_apply_bytes",
1694 	"sock_cork_bytes",
1695 	"sock_bytes",
1696 	"sock_redir_flags",
1697 	"sock_skb_opts",
1698 	"tls_sock_map",
1699 };
1700 
1701 int prog_attach_type[] = {
1702 	BPF_SK_SKB_STREAM_PARSER,
1703 	BPF_SK_SKB_STREAM_VERDICT,
1704 	BPF_SK_SKB_STREAM_VERDICT,
1705 	BPF_CGROUP_SOCK_OPS,
1706 	BPF_SK_MSG_VERDICT,
1707 	BPF_SK_MSG_VERDICT,
1708 	BPF_SK_MSG_VERDICT,
1709 	BPF_SK_MSG_VERDICT,
1710 	BPF_SK_MSG_VERDICT,
1711 	BPF_SK_MSG_VERDICT,
1712 	BPF_SK_MSG_VERDICT,
1713 };
1714 
1715 int prog_type[] = {
1716 	BPF_PROG_TYPE_SK_SKB,
1717 	BPF_PROG_TYPE_SK_SKB,
1718 	BPF_PROG_TYPE_SK_SKB,
1719 	BPF_PROG_TYPE_SOCK_OPS,
1720 	BPF_PROG_TYPE_SK_MSG,
1721 	BPF_PROG_TYPE_SK_MSG,
1722 	BPF_PROG_TYPE_SK_MSG,
1723 	BPF_PROG_TYPE_SK_MSG,
1724 	BPF_PROG_TYPE_SK_MSG,
1725 	BPF_PROG_TYPE_SK_MSG,
1726 	BPF_PROG_TYPE_SK_MSG,
1727 };
1728 
1729 static int populate_progs(char *bpf_file)
1730 {
1731 	struct bpf_program *prog;
1732 	struct bpf_object *obj;
1733 	int i = 0;
1734 	long err;
1735 
1736 	obj = bpf_object__open(bpf_file);
1737 	err = libbpf_get_error(obj);
1738 	if (err) {
1739 		char err_buf[256];
1740 
1741 		libbpf_strerror(err, err_buf, sizeof(err_buf));
1742 		printf("Unable to load eBPF objects in file '%s' : %s\n",
1743 		       bpf_file, err_buf);
1744 		return -1;
1745 	}
1746 
1747 	bpf_object__for_each_program(prog, obj) {
1748 		bpf_program__set_type(prog, prog_type[i]);
1749 		bpf_program__set_expected_attach_type(prog,
1750 						      prog_attach_type[i]);
1751 		i++;
1752 	}
1753 
1754 	i = bpf_object__load(obj);
1755 	i = 0;
1756 	bpf_object__for_each_program(prog, obj) {
1757 		prog_fd[i] = bpf_program__fd(prog);
1758 		i++;
1759 	}
1760 
1761 	for (i = 0; i < sizeof(map_fd)/sizeof(int); i++) {
1762 		maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
1763 		map_fd[i] = bpf_map__fd(maps[i]);
1764 		if (map_fd[i] < 0) {
1765 			fprintf(stderr, "load_bpf_file: (%i) %s\n",
1766 				map_fd[i], strerror(errno));
1767 			return -1;
1768 		}
1769 	}
1770 
1771 	return 0;
1772 }
1773 
1774 struct _test test[] = {
1775 	{"txmsg test passthrough", test_txmsg_pass},
1776 	{"txmsg test redirect", test_txmsg_redir},
1777 	{"txmsg test drop", test_txmsg_drop},
1778 	{"txmsg test ingress redirect", test_txmsg_ingress_redir},
1779 	{"txmsg test skb", test_txmsg_skb},
1780 	{"txmsg test apply", test_txmsg_apply},
1781 	{"txmsg test cork", test_txmsg_cork},
1782 	{"txmsg test hanging corks", test_txmsg_cork_hangs},
1783 	{"txmsg test push_data", test_txmsg_push},
1784 	{"txmsg test pull-data", test_txmsg_pull},
1785 	{"txmsg test pop-data", test_txmsg_pop},
1786 	{"txmsg test push/pop data", test_txmsg_push_pop},
1787 	{"txmsg text ingress parser", test_txmsg_ingress_parser},
1788 };
1789 
1790 static int check_whitelist(struct _test *t, struct sockmap_options *opt)
1791 {
1792 	char *entry, *ptr;
1793 
1794 	if (!opt->whitelist)
1795 		return 0;
1796 	ptr = strdup(opt->whitelist);
1797 	if (!ptr)
1798 		return -ENOMEM;
1799 	entry = strtok(ptr, ",");
1800 	while (entry) {
1801 		if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
1802 		    strstr(opt->map, entry) != 0 ||
1803 		    strstr(t->title, entry) != 0)
1804 			return 0;
1805 		entry = strtok(NULL, ",");
1806 	}
1807 	return -EINVAL;
1808 }
1809 
1810 static int check_blacklist(struct _test *t, struct sockmap_options *opt)
1811 {
1812 	char *entry, *ptr;
1813 
1814 	if (!opt->blacklist)
1815 		return -EINVAL;
1816 	ptr = strdup(opt->blacklist);
1817 	if (!ptr)
1818 		return -ENOMEM;
1819 	entry = strtok(ptr, ",");
1820 	while (entry) {
1821 		if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
1822 		    strstr(opt->map, entry) != 0 ||
1823 		    strstr(t->title, entry) != 0)
1824 			return 0;
1825 		entry = strtok(NULL, ",");
1826 	}
1827 	return -EINVAL;
1828 }
1829 
1830 static int __test_selftests(int cg_fd, struct sockmap_options *opt)
1831 {
1832 	int i, err;
1833 
1834 	err = populate_progs(opt->map);
1835 	if (err < 0) {
1836 		fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
1837 		return err;
1838 	}
1839 
1840 	/* Tests basic commands and APIs */
1841 	for (i = 0; i < sizeof(test)/sizeof(struct _test); i++) {
1842 		struct _test t = test[i];
1843 
1844 		if (check_whitelist(&t, opt) != 0)
1845 			continue;
1846 		if (check_blacklist(&t, opt) == 0)
1847 			continue;
1848 
1849 		test_start_subtest(&t, opt);
1850 		t.tester(cg_fd, opt);
1851 		test_end_subtest();
1852 	}
1853 
1854 	return err;
1855 }
1856 
1857 static void test_selftests_sockmap(int cg_fd, struct sockmap_options *opt)
1858 {
1859 	opt->map = BPF_SOCKMAP_FILENAME;
1860 	__test_selftests(cg_fd, opt);
1861 }
1862 
1863 static void test_selftests_sockhash(int cg_fd, struct sockmap_options *opt)
1864 {
1865 	opt->map = BPF_SOCKHASH_FILENAME;
1866 	__test_selftests(cg_fd, opt);
1867 }
1868 
1869 static void test_selftests_ktls(int cg_fd, struct sockmap_options *opt)
1870 {
1871 	opt->map = BPF_SOCKHASH_FILENAME;
1872 	opt->prepend = "ktls";
1873 	ktls = 1;
1874 	__test_selftests(cg_fd, opt);
1875 	ktls = 0;
1876 }
1877 
1878 static int test_selftest(int cg_fd, struct sockmap_options *opt)
1879 {
1880 
1881 	test_selftests_sockmap(cg_fd, opt);
1882 	test_selftests_sockhash(cg_fd, opt);
1883 	test_selftests_ktls(cg_fd, opt);
1884 	test_print_results();
1885 	return 0;
1886 }
1887 
1888 int main(int argc, char **argv)
1889 {
1890 	int iov_count = 1, length = 1024, rate = 1;
1891 	struct sockmap_options options = {0};
1892 	int opt, longindex, err, cg_fd = 0;
1893 	char *bpf_file = BPF_SOCKMAP_FILENAME;
1894 	int test = SELFTESTS;
1895 	bool cg_created = 0;
1896 
1897 	while ((opt = getopt_long(argc, argv, ":dhv:c:r:i:l:t:p:q:n:b:",
1898 				  long_options, &longindex)) != -1) {
1899 		switch (opt) {
1900 		case 's':
1901 			txmsg_start = atoi(optarg);
1902 			break;
1903 		case 'e':
1904 			txmsg_end = atoi(optarg);
1905 			break;
1906 		case 'p':
1907 			txmsg_start_push = atoi(optarg);
1908 			break;
1909 		case 'q':
1910 			txmsg_end_push = atoi(optarg);
1911 			break;
1912 		case 'w':
1913 			txmsg_start_pop = atoi(optarg);
1914 			break;
1915 		case 'x':
1916 			txmsg_pop = atoi(optarg);
1917 			break;
1918 		case 'a':
1919 			txmsg_apply = atoi(optarg);
1920 			break;
1921 		case 'k':
1922 			txmsg_cork = atoi(optarg);
1923 			break;
1924 		case 'c':
1925 			cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
1926 			if (cg_fd < 0) {
1927 				fprintf(stderr,
1928 					"ERROR: (%i) open cg path failed: %s\n",
1929 					cg_fd, optarg);
1930 				return cg_fd;
1931 			}
1932 			break;
1933 		case 'r':
1934 			rate = atoi(optarg);
1935 			break;
1936 		case 'v':
1937 			options.verbose = 1;
1938 			if (optarg)
1939 				options.verbose = atoi(optarg);
1940 			break;
1941 		case 'i':
1942 			iov_count = atoi(optarg);
1943 			break;
1944 		case 'l':
1945 			length = atoi(optarg);
1946 			break;
1947 		case 'd':
1948 			options.data_test = true;
1949 			break;
1950 		case 't':
1951 			if (strcmp(optarg, "ping") == 0) {
1952 				test = PING_PONG;
1953 			} else if (strcmp(optarg, "sendmsg") == 0) {
1954 				test = SENDMSG;
1955 			} else if (strcmp(optarg, "base") == 0) {
1956 				test = BASE;
1957 			} else if (strcmp(optarg, "base_sendpage") == 0) {
1958 				test = BASE_SENDPAGE;
1959 			} else if (strcmp(optarg, "sendpage") == 0) {
1960 				test = SENDPAGE;
1961 			} else {
1962 				usage(argv);
1963 				return -1;
1964 			}
1965 			break;
1966 		case 'n':
1967 			options.whitelist = strdup(optarg);
1968 			if (!options.whitelist)
1969 				return -ENOMEM;
1970 			break;
1971 		case 'b':
1972 			options.blacklist = strdup(optarg);
1973 			if (!options.blacklist)
1974 				return -ENOMEM;
1975 		case 0:
1976 			break;
1977 		case 'h':
1978 		default:
1979 			usage(argv);
1980 			return -1;
1981 		}
1982 	}
1983 
1984 	if (!cg_fd) {
1985 		cg_fd = cgroup_setup_and_join(CG_PATH);
1986 		if (cg_fd < 0)
1987 			return cg_fd;
1988 		cg_created = 1;
1989 	}
1990 
1991 	if (test == SELFTESTS) {
1992 		err = test_selftest(cg_fd, &options);
1993 		goto out;
1994 	}
1995 
1996 	err = populate_progs(bpf_file);
1997 	if (err) {
1998 		fprintf(stderr, "populate program: (%s) %s\n",
1999 			bpf_file, strerror(errno));
2000 		return 1;
2001 	}
2002 	running = 1;
2003 
2004 	/* catch SIGINT */
2005 	signal(SIGINT, running_handler);
2006 
2007 	options.iov_count = iov_count;
2008 	options.iov_length = length;
2009 	options.rate = rate;
2010 
2011 	err = run_options(&options, cg_fd, test);
2012 out:
2013 	if (options.whitelist)
2014 		free(options.whitelist);
2015 	if (options.blacklist)
2016 		free(options.blacklist);
2017 	if (cg_created)
2018 		cleanup_cgroup_environment();
2019 	close(cg_fd);
2020 	return err;
2021 }
2022 
2023 void running_handler(int a)
2024 {
2025 	running = 0;
2026 }
2027