1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <sys/socket.h>
6 #include <sys/ioctl.h>
7 #include <sys/select.h>
8 #include <netinet/in.h>
9 #include <arpa/inet.h>
10 #include <unistd.h>
11 #include <string.h>
12 #include <errno.h>
13 #include <stdbool.h>
14 #include <signal.h>
15 #include <fcntl.h>
16 #include <sys/wait.h>
17 #include <time.h>
18 #include <sched.h>
19 
20 #include <sys/time.h>
21 #include <sys/resource.h>
22 #include <sys/types.h>
23 #include <sys/sendfile.h>
24 
25 #include <linux/netlink.h>
26 #include <linux/socket.h>
27 #include <linux/sock_diag.h>
28 #include <linux/bpf.h>
29 #include <linux/if_link.h>
30 #include <linux/tls.h>
31 #include <assert.h>
32 #include <libgen.h>
33 
34 #include <getopt.h>
35 
36 #include <bpf/bpf.h>
37 #include <bpf/libbpf.h>
38 
39 #include "bpf_util.h"
40 #include "bpf_rlimit.h"
41 #include "cgroup_helpers.h"
42 
43 int running;
44 static void running_handler(int a);
45 
46 #ifndef TCP_ULP
47 # define TCP_ULP 31
48 #endif
49 #ifndef SOL_TLS
50 # define SOL_TLS 282
51 #endif
52 
53 /* randomly selected ports for testing on lo */
54 #define S1_PORT 10000
55 #define S2_PORT 10001
56 
57 #define BPF_SOCKMAP_FILENAME  "test_sockmap_kern.o"
58 #define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o"
59 #define CG_PATH "/sockmap"
60 
61 /* global sockets */
62 int s1, s2, c1, c2, p1, p2;
63 int test_cnt;
64 int passed;
65 int failed;
66 int map_fd[9];
67 struct bpf_map *maps[9];
68 int prog_fd[11];
69 
70 int txmsg_pass;
71 int txmsg_redir;
72 int txmsg_drop;
73 int txmsg_apply;
74 int txmsg_cork;
75 int txmsg_start;
76 int txmsg_end;
77 int txmsg_start_push;
78 int txmsg_end_push;
79 int txmsg_start_pop;
80 int txmsg_pop;
81 int txmsg_ingress;
82 int txmsg_redir_skb;
83 int txmsg_ktls_skb;
84 int txmsg_ktls_skb_drop;
85 int txmsg_ktls_skb_redir;
86 int ktls;
87 int peek_flag;
88 int skb_use_parser;
89 
90 static const struct option long_options[] = {
91 	{"help",	no_argument,		NULL, 'h' },
92 	{"cgroup",	required_argument,	NULL, 'c' },
93 	{"rate",	required_argument,	NULL, 'r' },
94 	{"verbose",	optional_argument,	NULL, 'v' },
95 	{"iov_count",	required_argument,	NULL, 'i' },
96 	{"length",	required_argument,	NULL, 'l' },
97 	{"test",	required_argument,	NULL, 't' },
98 	{"data_test",   no_argument,		NULL, 'd' },
99 	{"txmsg",		no_argument,	&txmsg_pass,  1  },
100 	{"txmsg_redir",		no_argument,	&txmsg_redir, 1  },
101 	{"txmsg_drop",		no_argument,	&txmsg_drop, 1 },
102 	{"txmsg_apply",	required_argument,	NULL, 'a'},
103 	{"txmsg_cork",	required_argument,	NULL, 'k'},
104 	{"txmsg_start", required_argument,	NULL, 's'},
105 	{"txmsg_end",	required_argument,	NULL, 'e'},
106 	{"txmsg_start_push", required_argument,	NULL, 'p'},
107 	{"txmsg_end_push",   required_argument,	NULL, 'q'},
108 	{"txmsg_start_pop",  required_argument,	NULL, 'w'},
109 	{"txmsg_pop",	     required_argument,	NULL, 'x'},
110 	{"txmsg_ingress", no_argument,		&txmsg_ingress, 1 },
111 	{"txmsg_redir_skb", no_argument,	&txmsg_redir_skb, 1 },
112 	{"ktls", no_argument,			&ktls, 1 },
113 	{"peek", no_argument,			&peek_flag, 1 },
114 	{"whitelist", required_argument,	NULL, 'n' },
115 	{"blacklist", required_argument,	NULL, 'b' },
116 	{0, 0, NULL, 0 }
117 };
118 
119 struct test_env {
120 	const char *type;
121 	const char *subtest;
122 	const char *prepend;
123 
124 	int test_num;
125 	int subtest_num;
126 
127 	int succ_cnt;
128 	int fail_cnt;
129 	int fail_last;
130 };
131 
132 struct test_env env;
133 
134 struct sockmap_options {
135 	int verbose;
136 	bool base;
137 	bool sendpage;
138 	bool data_test;
139 	bool drop_expected;
140 	int iov_count;
141 	int iov_length;
142 	int rate;
143 	char *map;
144 	char *whitelist;
145 	char *blacklist;
146 	char *prepend;
147 };
148 
149 struct _test {
150 	char *title;
151 	void (*tester)(int cg_fd, struct sockmap_options *opt);
152 };
153 
154 static void test_start(void)
155 {
156 	env.subtest_num++;
157 }
158 
159 static void test_fail(void)
160 {
161 	env.fail_cnt++;
162 }
163 
164 static void test_pass(void)
165 {
166 	env.succ_cnt++;
167 }
168 
169 static void test_reset(void)
170 {
171 	txmsg_start = txmsg_end = 0;
172 	txmsg_start_pop = txmsg_pop = 0;
173 	txmsg_start_push = txmsg_end_push = 0;
174 	txmsg_pass = txmsg_drop = txmsg_redir = 0;
175 	txmsg_apply = txmsg_cork = 0;
176 	txmsg_ingress = txmsg_redir_skb = 0;
177 	txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
178 	skb_use_parser = 0;
179 }
180 
181 static int test_start_subtest(const struct _test *t, struct sockmap_options *o)
182 {
183 	env.type = o->map;
184 	env.subtest = t->title;
185 	env.prepend = o->prepend;
186 	env.test_num++;
187 	env.subtest_num = 0;
188 	env.fail_last = env.fail_cnt;
189 	test_reset();
190 	return 0;
191 }
192 
193 static void test_end_subtest(void)
194 {
195 	int error = env.fail_cnt - env.fail_last;
196 	int type = strcmp(env.type, BPF_SOCKMAP_FILENAME);
197 
198 	if (!error)
199 		test_pass();
200 
201 	fprintf(stdout, "#%2d/%2d %8s:%s:%s:%s\n",
202 		env.test_num, env.subtest_num,
203 		!type ? "sockmap" : "sockhash",
204 		env.prepend ? : "",
205 		env.subtest, error ? "FAIL" : "OK");
206 }
207 
208 static void test_print_results(void)
209 {
210 	fprintf(stdout, "Pass: %d Fail: %d\n",
211 		env.succ_cnt, env.fail_cnt);
212 }
213 
214 static void usage(char *argv[])
215 {
216 	int i;
217 
218 	printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
219 	printf(" options:\n");
220 	for (i = 0; long_options[i].name != 0; i++) {
221 		printf(" --%-12s", long_options[i].name);
222 		if (long_options[i].flag != NULL)
223 			printf(" flag (internal value:%d)\n",
224 				*long_options[i].flag);
225 		else
226 			printf(" -%c\n", long_options[i].val);
227 	}
228 	printf("\n");
229 }
230 
231 char *sock_to_string(int s)
232 {
233 	if (s == c1)
234 		return "client1";
235 	else if (s == c2)
236 		return "client2";
237 	else if (s == s1)
238 		return "server1";
239 	else if (s == s2)
240 		return "server2";
241 	else if (s == p1)
242 		return "peer1";
243 	else if (s == p2)
244 		return "peer2";
245 	else
246 		return "unknown";
247 }
248 
249 static int sockmap_init_ktls(int verbose, int s)
250 {
251 	struct tls12_crypto_info_aes_gcm_128 tls_tx = {
252 		.info = {
253 			.version     = TLS_1_2_VERSION,
254 			.cipher_type = TLS_CIPHER_AES_GCM_128,
255 		},
256 	};
257 	struct tls12_crypto_info_aes_gcm_128 tls_rx = {
258 		.info = {
259 			.version     = TLS_1_2_VERSION,
260 			.cipher_type = TLS_CIPHER_AES_GCM_128,
261 		},
262 	};
263 	int so_buf = 6553500;
264 	int err;
265 
266 	err = setsockopt(s, 6, TCP_ULP, "tls", sizeof("tls"));
267 	if (err) {
268 		fprintf(stderr, "setsockopt: TCP_ULP(%s) failed with error %i\n", sock_to_string(s), err);
269 		return -EINVAL;
270 	}
271 	err = setsockopt(s, SOL_TLS, TLS_TX, (void *)&tls_tx, sizeof(tls_tx));
272 	if (err) {
273 		fprintf(stderr, "setsockopt: TLS_TX(%s) failed with error %i\n", sock_to_string(s), err);
274 		return -EINVAL;
275 	}
276 	err = setsockopt(s, SOL_TLS, TLS_RX, (void *)&tls_rx, sizeof(tls_rx));
277 	if (err) {
278 		fprintf(stderr, "setsockopt: TLS_RX(%s) failed with error %i\n", sock_to_string(s), err);
279 		return -EINVAL;
280 	}
281 	err = setsockopt(s, SOL_SOCKET, SO_SNDBUF, &so_buf, sizeof(so_buf));
282 	if (err) {
283 		fprintf(stderr, "setsockopt: (%s) failed sndbuf with error %i\n", sock_to_string(s), err);
284 		return -EINVAL;
285 	}
286 	err = setsockopt(s, SOL_SOCKET, SO_RCVBUF, &so_buf, sizeof(so_buf));
287 	if (err) {
288 		fprintf(stderr, "setsockopt: (%s) failed rcvbuf with error %i\n", sock_to_string(s), err);
289 		return -EINVAL;
290 	}
291 
292 	if (verbose)
293 		fprintf(stdout, "socket(%s) kTLS enabled\n", sock_to_string(s));
294 	return 0;
295 }
296 static int sockmap_init_sockets(int verbose)
297 {
298 	int i, err, one = 1;
299 	struct sockaddr_in addr;
300 	int *fds[4] = {&s1, &s2, &c1, &c2};
301 
302 	s1 = s2 = p1 = p2 = c1 = c2 = 0;
303 
304 	/* Init sockets */
305 	for (i = 0; i < 4; i++) {
306 		*fds[i] = socket(AF_INET, SOCK_STREAM, 0);
307 		if (*fds[i] < 0) {
308 			perror("socket s1 failed()");
309 			return errno;
310 		}
311 	}
312 
313 	/* Allow reuse */
314 	for (i = 0; i < 2; i++) {
315 		err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
316 				 (char *)&one, sizeof(one));
317 		if (err) {
318 			perror("setsockopt failed()");
319 			return errno;
320 		}
321 	}
322 
323 	/* Non-blocking sockets */
324 	for (i = 0; i < 2; i++) {
325 		err = ioctl(*fds[i], FIONBIO, (char *)&one);
326 		if (err < 0) {
327 			perror("ioctl s1 failed()");
328 			return errno;
329 		}
330 	}
331 
332 	/* Bind server sockets */
333 	memset(&addr, 0, sizeof(struct sockaddr_in));
334 	addr.sin_family = AF_INET;
335 	addr.sin_addr.s_addr = inet_addr("127.0.0.1");
336 
337 	addr.sin_port = htons(S1_PORT);
338 	err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
339 	if (err < 0) {
340 		perror("bind s1 failed()");
341 		return errno;
342 	}
343 
344 	addr.sin_port = htons(S2_PORT);
345 	err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
346 	if (err < 0) {
347 		perror("bind s2 failed()");
348 		return errno;
349 	}
350 
351 	/* Listen server sockets */
352 	addr.sin_port = htons(S1_PORT);
353 	err = listen(s1, 32);
354 	if (err < 0) {
355 		perror("listen s1 failed()");
356 		return errno;
357 	}
358 
359 	addr.sin_port = htons(S2_PORT);
360 	err = listen(s2, 32);
361 	if (err < 0) {
362 		perror("listen s1 failed()");
363 		return errno;
364 	}
365 
366 	/* Initiate Connect */
367 	addr.sin_port = htons(S1_PORT);
368 	err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
369 	if (err < 0 && errno != EINPROGRESS) {
370 		perror("connect c1 failed()");
371 		return errno;
372 	}
373 
374 	addr.sin_port = htons(S2_PORT);
375 	err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
376 	if (err < 0 && errno != EINPROGRESS) {
377 		perror("connect c2 failed()");
378 		return errno;
379 	} else if (err < 0) {
380 		err = 0;
381 	}
382 
383 	/* Accept Connecrtions */
384 	p1 = accept(s1, NULL, NULL);
385 	if (p1 < 0) {
386 		perror("accept s1 failed()");
387 		return errno;
388 	}
389 
390 	p2 = accept(s2, NULL, NULL);
391 	if (p2 < 0) {
392 		perror("accept s1 failed()");
393 		return errno;
394 	}
395 
396 	if (verbose > 1) {
397 		printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
398 		printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
399 			c1, s1, c2, s2);
400 	}
401 	return 0;
402 }
403 
404 struct msg_stats {
405 	size_t bytes_sent;
406 	size_t bytes_recvd;
407 	struct timespec start;
408 	struct timespec end;
409 };
410 
411 static int msg_loop_sendpage(int fd, int iov_length, int cnt,
412 			     struct msg_stats *s,
413 			     struct sockmap_options *opt)
414 {
415 	bool drop = opt->drop_expected;
416 	unsigned char k = 0;
417 	FILE *file;
418 	int i, fp;
419 
420 	file = tmpfile();
421 	if (!file) {
422 		perror("create file for sendpage");
423 		return 1;
424 	}
425 	for (i = 0; i < iov_length * cnt; i++, k++)
426 		fwrite(&k, sizeof(char), 1, file);
427 	fflush(file);
428 	fseek(file, 0, SEEK_SET);
429 
430 	fp = fileno(file);
431 
432 	clock_gettime(CLOCK_MONOTONIC, &s->start);
433 	for (i = 0; i < cnt; i++) {
434 		int sent;
435 
436 		errno = 0;
437 		sent = sendfile(fd, fp, NULL, iov_length);
438 
439 		if (!drop && sent < 0) {
440 			perror("sendpage loop error");
441 			fclose(file);
442 			return sent;
443 		} else if (drop && sent >= 0) {
444 			printf("sendpage loop error expected: %i errno %i\n",
445 			       sent, errno);
446 			fclose(file);
447 			return -EIO;
448 		}
449 
450 		if (sent > 0)
451 			s->bytes_sent += sent;
452 	}
453 	clock_gettime(CLOCK_MONOTONIC, &s->end);
454 	fclose(file);
455 	return 0;
456 }
457 
458 static void msg_free_iov(struct msghdr *msg)
459 {
460 	int i;
461 
462 	for (i = 0; i < msg->msg_iovlen; i++)
463 		free(msg->msg_iov[i].iov_base);
464 	free(msg->msg_iov);
465 	msg->msg_iov = NULL;
466 	msg->msg_iovlen = 0;
467 }
468 
469 static int msg_alloc_iov(struct msghdr *msg,
470 			 int iov_count, int iov_length,
471 			 bool data, bool xmit)
472 {
473 	unsigned char k = 0;
474 	struct iovec *iov;
475 	int i;
476 
477 	iov = calloc(iov_count, sizeof(struct iovec));
478 	if (!iov)
479 		return errno;
480 
481 	for (i = 0; i < iov_count; i++) {
482 		unsigned char *d = calloc(iov_length, sizeof(char));
483 
484 		if (!d) {
485 			fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
486 			goto unwind_iov;
487 		}
488 		iov[i].iov_base = d;
489 		iov[i].iov_len = iov_length;
490 
491 		if (data && xmit) {
492 			int j;
493 
494 			for (j = 0; j < iov_length; j++)
495 				d[j] = k++;
496 		}
497 	}
498 
499 	msg->msg_iov = iov;
500 	msg->msg_iovlen = iov_count;
501 
502 	return 0;
503 unwind_iov:
504 	for (i--; i >= 0 ; i--)
505 		free(msg->msg_iov[i].iov_base);
506 	return -ENOMEM;
507 }
508 
509 static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
510 {
511 	int i, j = 0, bytes_cnt = 0;
512 	unsigned char k = 0;
513 
514 	for (i = 0; i < msg->msg_iovlen; i++) {
515 		unsigned char *d = msg->msg_iov[i].iov_base;
516 
517 		/* Special case test for skb ingress + ktls */
518 		if (i == 0 && txmsg_ktls_skb) {
519 			if (msg->msg_iov[i].iov_len < 4)
520 				return -EIO;
521 			if (txmsg_ktls_skb_redir) {
522 				if (memcmp(&d[13], "PASS", 4) != 0) {
523 					fprintf(stderr,
524 						"detected redirect ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[13], d[14], d[15], d[16]);
525 					return -EIO;
526 				}
527 				d[13] = 0;
528 				d[14] = 1;
529 				d[15] = 2;
530 				d[16] = 3;
531 				j = 13;
532 			} else if (txmsg_ktls_skb) {
533 				if (memcmp(d, "PASS", 4) != 0) {
534 					fprintf(stderr,
535 						"detected ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]);
536 					return -EIO;
537 				}
538 				d[0] = 0;
539 				d[1] = 1;
540 				d[2] = 2;
541 				d[3] = 3;
542 			}
543 		}
544 
545 		for (; j < msg->msg_iov[i].iov_len && size; j++) {
546 			if (d[j] != k++) {
547 				fprintf(stderr,
548 					"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
549 					i, j, d[j], k - 1, d[j+1], k);
550 				return -EIO;
551 			}
552 			bytes_cnt++;
553 			if (bytes_cnt == chunk_sz) {
554 				k = 0;
555 				bytes_cnt = 0;
556 			}
557 			size--;
558 		}
559 	}
560 	return 0;
561 }
562 
563 static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
564 		    struct msg_stats *s, bool tx,
565 		    struct sockmap_options *opt)
566 {
567 	struct msghdr msg = {0}, msg_peek = {0};
568 	int err, i, flags = MSG_NOSIGNAL;
569 	bool drop = opt->drop_expected;
570 	bool data = opt->data_test;
571 
572 	err = msg_alloc_iov(&msg, iov_count, iov_length, data, tx);
573 	if (err)
574 		goto out_errno;
575 	if (peek_flag) {
576 		err = msg_alloc_iov(&msg_peek, iov_count, iov_length, data, tx);
577 		if (err)
578 			goto out_errno;
579 	}
580 
581 	if (tx) {
582 		clock_gettime(CLOCK_MONOTONIC, &s->start);
583 		for (i = 0; i < cnt; i++) {
584 			int sent;
585 
586 			errno = 0;
587 			sent = sendmsg(fd, &msg, flags);
588 
589 			if (!drop && sent < 0) {
590 				perror("sendmsg loop error");
591 				goto out_errno;
592 			} else if (drop && sent >= 0) {
593 				fprintf(stderr,
594 					"sendmsg loop error expected: %i errno %i\n",
595 					sent, errno);
596 				errno = -EIO;
597 				goto out_errno;
598 			}
599 			if (sent > 0)
600 				s->bytes_sent += sent;
601 		}
602 		clock_gettime(CLOCK_MONOTONIC, &s->end);
603 	} else {
604 		int slct, recvp = 0, recv, max_fd = fd;
605 		float total_bytes, txmsg_pop_total;
606 		int fd_flags = O_NONBLOCK;
607 		struct timeval timeout;
608 		fd_set w;
609 
610 		fcntl(fd, fd_flags);
611 		/* Account for pop bytes noting each iteration of apply will
612 		 * call msg_pop_data helper so we need to account for this
613 		 * by calculating the number of apply iterations. Note user
614 		 * of the tool can create cases where no data is sent by
615 		 * manipulating pop/push/pull/etc. For example txmsg_apply 1
616 		 * with txmsg_pop 1 will try to apply 1B at a time but each
617 		 * iteration will then pop 1B so no data will ever be sent.
618 		 * This is really only useful for testing edge cases in code
619 		 * paths.
620 		 */
621 		total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
622 		if (txmsg_apply)
623 			txmsg_pop_total = txmsg_pop * (total_bytes / txmsg_apply);
624 		else
625 			txmsg_pop_total = txmsg_pop * cnt;
626 		total_bytes -= txmsg_pop_total;
627 		err = clock_gettime(CLOCK_MONOTONIC, &s->start);
628 		if (err < 0)
629 			perror("recv start time");
630 		while (s->bytes_recvd < total_bytes) {
631 			if (txmsg_cork) {
632 				timeout.tv_sec = 0;
633 				timeout.tv_usec = 300000;
634 			} else {
635 				timeout.tv_sec = 3;
636 				timeout.tv_usec = 0;
637 			}
638 
639 			/* FD sets */
640 			FD_ZERO(&w);
641 			FD_SET(fd, &w);
642 
643 			slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
644 			if (slct == -1) {
645 				perror("select()");
646 				clock_gettime(CLOCK_MONOTONIC, &s->end);
647 				goto out_errno;
648 			} else if (!slct) {
649 				if (opt->verbose)
650 					fprintf(stderr, "unexpected timeout: recved %zu/%f pop_total %f\n", s->bytes_recvd, total_bytes, txmsg_pop_total);
651 				errno = -EIO;
652 				clock_gettime(CLOCK_MONOTONIC, &s->end);
653 				goto out_errno;
654 			}
655 
656 			errno = 0;
657 			if (peek_flag) {
658 				flags |= MSG_PEEK;
659 				recvp = recvmsg(fd, &msg_peek, flags);
660 				if (recvp < 0) {
661 					if (errno != EWOULDBLOCK) {
662 						clock_gettime(CLOCK_MONOTONIC, &s->end);
663 						goto out_errno;
664 					}
665 				}
666 				flags = 0;
667 			}
668 
669 			recv = recvmsg(fd, &msg, flags);
670 			if (recv < 0) {
671 				if (errno != EWOULDBLOCK) {
672 					clock_gettime(CLOCK_MONOTONIC, &s->end);
673 					perror("recv failed()");
674 					goto out_errno;
675 				}
676 			}
677 
678 			s->bytes_recvd += recv;
679 
680 			if (data) {
681 				int chunk_sz = opt->sendpage ?
682 						iov_length * cnt :
683 						iov_length * iov_count;
684 
685 				errno = msg_verify_data(&msg, recv, chunk_sz);
686 				if (errno) {
687 					perror("data verify msg failed");
688 					goto out_errno;
689 				}
690 				if (recvp) {
691 					errno = msg_verify_data(&msg_peek,
692 								recvp,
693 								chunk_sz);
694 					if (errno) {
695 						perror("data verify msg_peek failed");
696 						goto out_errno;
697 					}
698 				}
699 			}
700 		}
701 		clock_gettime(CLOCK_MONOTONIC, &s->end);
702 	}
703 
704 	msg_free_iov(&msg);
705 	msg_free_iov(&msg_peek);
706 	return err;
707 out_errno:
708 	msg_free_iov(&msg);
709 	msg_free_iov(&msg_peek);
710 	return errno;
711 }
712 
713 static float giga = 1000000000;
714 
715 static inline float sentBps(struct msg_stats s)
716 {
717 	return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
718 }
719 
720 static inline float recvdBps(struct msg_stats s)
721 {
722 	return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
723 }
724 
725 static int sendmsg_test(struct sockmap_options *opt)
726 {
727 	float sent_Bps = 0, recvd_Bps = 0;
728 	int rx_fd, txpid, rxpid, err = 0;
729 	struct msg_stats s = {0};
730 	int iov_count = opt->iov_count;
731 	int iov_buf = opt->iov_length;
732 	int rx_status, tx_status;
733 	int cnt = opt->rate;
734 
735 	errno = 0;
736 
737 	if (opt->base)
738 		rx_fd = p1;
739 	else
740 		rx_fd = p2;
741 
742 	if (ktls) {
743 		/* Redirecting into non-TLS socket which sends into a TLS
744 		 * socket is not a valid test. So in this case lets not
745 		 * enable kTLS but still run the test.
746 		 */
747 		if (!txmsg_redir || (txmsg_redir && txmsg_ingress)) {
748 			err = sockmap_init_ktls(opt->verbose, rx_fd);
749 			if (err)
750 				return err;
751 		}
752 		err = sockmap_init_ktls(opt->verbose, c1);
753 		if (err)
754 			return err;
755 	}
756 
757 	rxpid = fork();
758 	if (rxpid == 0) {
759 		iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
760 		if (opt->drop_expected || txmsg_ktls_skb_drop)
761 			_exit(0);
762 
763 		if (!iov_buf) /* zero bytes sent case */
764 			_exit(0);
765 
766 		if (opt->sendpage)
767 			iov_count = 1;
768 		err = msg_loop(rx_fd, iov_count, iov_buf,
769 			       cnt, &s, false, opt);
770 		if (opt->verbose > 1)
771 			fprintf(stderr,
772 				"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
773 				iov_count, iov_buf, cnt, err);
774 		if (s.end.tv_sec - s.start.tv_sec) {
775 			sent_Bps = sentBps(s);
776 			recvd_Bps = recvdBps(s);
777 		}
778 		if (opt->verbose > 1)
779 			fprintf(stdout,
780 				"rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n",
781 				s.bytes_sent, sent_Bps, sent_Bps/giga,
782 				s.bytes_recvd, recvd_Bps, recvd_Bps/giga,
783 				peek_flag ? "(peek_msg)" : "");
784 		if (err && txmsg_cork)
785 			err = 0;
786 		exit(err ? 1 : 0);
787 	} else if (rxpid == -1) {
788 		perror("msg_loop_rx");
789 		return errno;
790 	}
791 
792 	txpid = fork();
793 	if (txpid == 0) {
794 		if (opt->sendpage)
795 			err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
796 		else
797 			err = msg_loop(c1, iov_count, iov_buf,
798 				       cnt, &s, true, opt);
799 
800 		if (err)
801 			fprintf(stderr,
802 				"msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
803 				iov_count, iov_buf, cnt, err);
804 		if (s.end.tv_sec - s.start.tv_sec) {
805 			sent_Bps = sentBps(s);
806 			recvd_Bps = recvdBps(s);
807 		}
808 		if (opt->verbose > 1)
809 			fprintf(stdout,
810 				"tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
811 				s.bytes_sent, sent_Bps, sent_Bps/giga,
812 				s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
813 		exit(err ? 1 : 0);
814 	} else if (txpid == -1) {
815 		perror("msg_loop_tx");
816 		return errno;
817 	}
818 
819 	assert(waitpid(rxpid, &rx_status, 0) == rxpid);
820 	assert(waitpid(txpid, &tx_status, 0) == txpid);
821 	if (WIFEXITED(rx_status)) {
822 		err = WEXITSTATUS(rx_status);
823 		if (err) {
824 			fprintf(stderr, "rx thread exited with err %d.\n", err);
825 			goto out;
826 		}
827 	}
828 	if (WIFEXITED(tx_status)) {
829 		err = WEXITSTATUS(tx_status);
830 		if (err)
831 			fprintf(stderr, "tx thread exited with err %d.\n", err);
832 	}
833 out:
834 	return err;
835 }
836 
837 static int forever_ping_pong(int rate, struct sockmap_options *opt)
838 {
839 	struct timeval timeout;
840 	char buf[1024] = {0};
841 	int sc;
842 
843 	timeout.tv_sec = 10;
844 	timeout.tv_usec = 0;
845 
846 	/* Ping/Pong data from client to server */
847 	sc = send(c1, buf, sizeof(buf), 0);
848 	if (sc < 0) {
849 		perror("send failed()");
850 		return sc;
851 	}
852 
853 	do {
854 		int s, rc, i, max_fd = p2;
855 		fd_set w;
856 
857 		/* FD sets */
858 		FD_ZERO(&w);
859 		FD_SET(c1, &w);
860 		FD_SET(c2, &w);
861 		FD_SET(p1, &w);
862 		FD_SET(p2, &w);
863 
864 		s = select(max_fd + 1, &w, NULL, NULL, &timeout);
865 		if (s == -1) {
866 			perror("select()");
867 			break;
868 		} else if (!s) {
869 			fprintf(stderr, "unexpected timeout\n");
870 			break;
871 		}
872 
873 		for (i = 0; i <= max_fd && s > 0; ++i) {
874 			if (!FD_ISSET(i, &w))
875 				continue;
876 
877 			s--;
878 
879 			rc = recv(i, buf, sizeof(buf), 0);
880 			if (rc < 0) {
881 				if (errno != EWOULDBLOCK) {
882 					perror("recv failed()");
883 					return rc;
884 				}
885 			}
886 
887 			if (rc == 0) {
888 				close(i);
889 				break;
890 			}
891 
892 			sc = send(i, buf, rc, 0);
893 			if (sc < 0) {
894 				perror("send failed()");
895 				return sc;
896 			}
897 		}
898 
899 		if (rate)
900 			sleep(rate);
901 
902 		if (opt->verbose) {
903 			printf(".");
904 			fflush(stdout);
905 
906 		}
907 	} while (running);
908 
909 	return 0;
910 }
911 
912 enum {
913 	SELFTESTS,
914 	PING_PONG,
915 	SENDMSG,
916 	BASE,
917 	BASE_SENDPAGE,
918 	SENDPAGE,
919 };
920 
921 static int run_options(struct sockmap_options *options, int cg_fd,  int test)
922 {
923 	int i, key, next_key, err, tx_prog_fd = -1, zero = 0;
924 
925 	/* If base test skip BPF setup */
926 	if (test == BASE || test == BASE_SENDPAGE)
927 		goto run;
928 
929 	/* Attach programs to sockmap */
930 	err = bpf_prog_attach(prog_fd[0], map_fd[0],
931 				BPF_SK_SKB_STREAM_PARSER, 0);
932 	if (err) {
933 		fprintf(stderr,
934 			"ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
935 			prog_fd[0], map_fd[0], err, strerror(errno));
936 		return err;
937 	}
938 
939 	err = bpf_prog_attach(prog_fd[1], map_fd[0],
940 				BPF_SK_SKB_STREAM_VERDICT, 0);
941 	if (err) {
942 		fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
943 			err, strerror(errno));
944 		return err;
945 	}
946 
947 	/* Attach programs to TLS sockmap */
948 	if (txmsg_ktls_skb) {
949 		err = bpf_prog_attach(prog_fd[0], map_fd[8],
950 					BPF_SK_SKB_STREAM_PARSER, 0);
951 		if (err) {
952 			fprintf(stderr,
953 				"ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
954 				prog_fd[0], map_fd[8], err, strerror(errno));
955 			return err;
956 		}
957 
958 		err = bpf_prog_attach(prog_fd[2], map_fd[8],
959 				      BPF_SK_SKB_STREAM_VERDICT, 0);
960 		if (err) {
961 			fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n",
962 				err, strerror(errno));
963 			return err;
964 		}
965 	}
966 
967 	/* Attach to cgroups */
968 	err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
969 	if (err) {
970 		fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
971 			err, strerror(errno));
972 		return err;
973 	}
974 
975 run:
976 	err = sockmap_init_sockets(options->verbose);
977 	if (err) {
978 		fprintf(stderr, "ERROR: test socket failed: %d\n", err);
979 		goto out;
980 	}
981 
982 	/* Attach txmsg program to sockmap */
983 	if (txmsg_pass)
984 		tx_prog_fd = prog_fd[4];
985 	else if (txmsg_redir)
986 		tx_prog_fd = prog_fd[5];
987 	else if (txmsg_apply)
988 		tx_prog_fd = prog_fd[6];
989 	else if (txmsg_cork)
990 		tx_prog_fd = prog_fd[7];
991 	else if (txmsg_drop)
992 		tx_prog_fd = prog_fd[8];
993 	else
994 		tx_prog_fd = 0;
995 
996 	if (tx_prog_fd) {
997 		int redir_fd, i = 0;
998 
999 		err = bpf_prog_attach(tx_prog_fd,
1000 				      map_fd[1], BPF_SK_MSG_VERDICT, 0);
1001 		if (err) {
1002 			fprintf(stderr,
1003 				"ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
1004 				err, strerror(errno));
1005 			goto out;
1006 		}
1007 
1008 		err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
1009 		if (err) {
1010 			fprintf(stderr,
1011 				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
1012 				err, strerror(errno));
1013 			goto out;
1014 		}
1015 
1016 		if (txmsg_redir)
1017 			redir_fd = c2;
1018 		else
1019 			redir_fd = c1;
1020 
1021 		err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
1022 		if (err) {
1023 			fprintf(stderr,
1024 				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
1025 				err, strerror(errno));
1026 			goto out;
1027 		}
1028 
1029 		if (txmsg_apply) {
1030 			err = bpf_map_update_elem(map_fd[3],
1031 						  &i, &txmsg_apply, BPF_ANY);
1032 			if (err) {
1033 				fprintf(stderr,
1034 					"ERROR: bpf_map_update_elem (apply_bytes):  %d (%s\n",
1035 					err, strerror(errno));
1036 				goto out;
1037 			}
1038 		}
1039 
1040 		if (txmsg_cork) {
1041 			err = bpf_map_update_elem(map_fd[4],
1042 						  &i, &txmsg_cork, BPF_ANY);
1043 			if (err) {
1044 				fprintf(stderr,
1045 					"ERROR: bpf_map_update_elem (cork_bytes):  %d (%s\n",
1046 					err, strerror(errno));
1047 				goto out;
1048 			}
1049 		}
1050 
1051 		if (txmsg_start) {
1052 			err = bpf_map_update_elem(map_fd[5],
1053 						  &i, &txmsg_start, BPF_ANY);
1054 			if (err) {
1055 				fprintf(stderr,
1056 					"ERROR: bpf_map_update_elem (txmsg_start):  %d (%s)\n",
1057 					err, strerror(errno));
1058 				goto out;
1059 			}
1060 		}
1061 
1062 		if (txmsg_end) {
1063 			i = 1;
1064 			err = bpf_map_update_elem(map_fd[5],
1065 						  &i, &txmsg_end, BPF_ANY);
1066 			if (err) {
1067 				fprintf(stderr,
1068 					"ERROR: bpf_map_update_elem (txmsg_end):  %d (%s)\n",
1069 					err, strerror(errno));
1070 				goto out;
1071 			}
1072 		}
1073 
1074 		if (txmsg_start_push) {
1075 			i = 2;
1076 			err = bpf_map_update_elem(map_fd[5],
1077 						  &i, &txmsg_start_push, BPF_ANY);
1078 			if (err) {
1079 				fprintf(stderr,
1080 					"ERROR: bpf_map_update_elem (txmsg_start_push):  %d (%s)\n",
1081 					err, strerror(errno));
1082 				goto out;
1083 			}
1084 		}
1085 
1086 		if (txmsg_end_push) {
1087 			i = 3;
1088 			err = bpf_map_update_elem(map_fd[5],
1089 						  &i, &txmsg_end_push, BPF_ANY);
1090 			if (err) {
1091 				fprintf(stderr,
1092 					"ERROR: bpf_map_update_elem %i@%i (txmsg_end_push):  %d (%s)\n",
1093 					txmsg_end_push, i, err, strerror(errno));
1094 				goto out;
1095 			}
1096 		}
1097 
1098 		if (txmsg_start_pop) {
1099 			i = 4;
1100 			err = bpf_map_update_elem(map_fd[5],
1101 						  &i, &txmsg_start_pop, BPF_ANY);
1102 			if (err) {
1103 				fprintf(stderr,
1104 					"ERROR: bpf_map_update_elem %i@%i (txmsg_start_pop):  %d (%s)\n",
1105 					txmsg_start_pop, i, err, strerror(errno));
1106 				goto out;
1107 			}
1108 		} else {
1109 			i = 4;
1110 			bpf_map_update_elem(map_fd[5],
1111 						  &i, &txmsg_start_pop, BPF_ANY);
1112 		}
1113 
1114 		if (txmsg_pop) {
1115 			i = 5;
1116 			err = bpf_map_update_elem(map_fd[5],
1117 						  &i, &txmsg_pop, BPF_ANY);
1118 			if (err) {
1119 				fprintf(stderr,
1120 					"ERROR: bpf_map_update_elem %i@%i (txmsg_pop):  %d (%s)\n",
1121 					txmsg_pop, i, err, strerror(errno));
1122 				goto out;
1123 			}
1124 		} else {
1125 			i = 5;
1126 			bpf_map_update_elem(map_fd[5],
1127 					    &i, &txmsg_pop, BPF_ANY);
1128 
1129 		}
1130 
1131 		if (txmsg_ingress) {
1132 			int in = BPF_F_INGRESS;
1133 
1134 			i = 0;
1135 			err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
1136 			if (err) {
1137 				fprintf(stderr,
1138 					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
1139 					err, strerror(errno));
1140 			}
1141 			i = 1;
1142 			err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
1143 			if (err) {
1144 				fprintf(stderr,
1145 					"ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
1146 					err, strerror(errno));
1147 			}
1148 			err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
1149 			if (err) {
1150 				fprintf(stderr,
1151 					"ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
1152 					err, strerror(errno));
1153 			}
1154 
1155 			i = 2;
1156 			err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
1157 			if (err) {
1158 				fprintf(stderr,
1159 					"ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
1160 					err, strerror(errno));
1161 			}
1162 		}
1163 
1164 		if (txmsg_ktls_skb) {
1165 			int ingress = BPF_F_INGRESS;
1166 
1167 			i = 0;
1168 			err = bpf_map_update_elem(map_fd[8], &i, &p2, BPF_ANY);
1169 			if (err) {
1170 				fprintf(stderr,
1171 					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
1172 					err, strerror(errno));
1173 			}
1174 
1175 			if (txmsg_ktls_skb_redir) {
1176 				i = 1;
1177 				err = bpf_map_update_elem(map_fd[7],
1178 							  &i, &ingress, BPF_ANY);
1179 				if (err) {
1180 					fprintf(stderr,
1181 						"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
1182 						err, strerror(errno));
1183 				}
1184 			}
1185 
1186 			if (txmsg_ktls_skb_drop) {
1187 				i = 1;
1188 				err = bpf_map_update_elem(map_fd[7], &i, &i, BPF_ANY);
1189 			}
1190 		}
1191 
1192 		if (txmsg_redir_skb) {
1193 			int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
1194 					p2 : p1;
1195 			int ingress = BPF_F_INGRESS;
1196 
1197 			i = 0;
1198 			err = bpf_map_update_elem(map_fd[7],
1199 						  &i, &ingress, BPF_ANY);
1200 			if (err) {
1201 				fprintf(stderr,
1202 					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
1203 					err, strerror(errno));
1204 			}
1205 
1206 			i = 3;
1207 			err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
1208 			if (err) {
1209 				fprintf(stderr,
1210 					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
1211 					err, strerror(errno));
1212 			}
1213 		}
1214 	}
1215 
1216 	if (skb_use_parser) {
1217 		i = 2;
1218 		err = bpf_map_update_elem(map_fd[7], &i, &skb_use_parser, BPF_ANY);
1219 	}
1220 
1221 	if (txmsg_drop)
1222 		options->drop_expected = true;
1223 
1224 	if (test == PING_PONG)
1225 		err = forever_ping_pong(options->rate, options);
1226 	else if (test == SENDMSG) {
1227 		options->base = false;
1228 		options->sendpage = false;
1229 		err = sendmsg_test(options);
1230 	} else if (test == SENDPAGE) {
1231 		options->base = false;
1232 		options->sendpage = true;
1233 		err = sendmsg_test(options);
1234 	} else if (test == BASE) {
1235 		options->base = true;
1236 		options->sendpage = false;
1237 		err = sendmsg_test(options);
1238 	} else if (test == BASE_SENDPAGE) {
1239 		options->base = true;
1240 		options->sendpage = true;
1241 		err = sendmsg_test(options);
1242 	} else
1243 		fprintf(stderr, "unknown test\n");
1244 out:
1245 	/* Detatch and zero all the maps */
1246 	bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS);
1247 	bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
1248 	bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
1249 	bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER);
1250 	bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT);
1251 
1252 	if (tx_prog_fd >= 0)
1253 		bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
1254 
1255 	for (i = 0; i < 8; i++) {
1256 		key = next_key = 0;
1257 		bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
1258 		while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) {
1259 			bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
1260 			key = next_key;
1261 		}
1262 	}
1263 
1264 	close(s1);
1265 	close(s2);
1266 	close(p1);
1267 	close(p2);
1268 	close(c1);
1269 	close(c2);
1270 	return err;
1271 }
1272 
1273 static char *test_to_str(int test)
1274 {
1275 	switch (test) {
1276 	case SENDMSG:
1277 		return "sendmsg";
1278 	case SENDPAGE:
1279 		return "sendpage";
1280 	}
1281 	return "unknown";
1282 }
1283 
1284 #define OPTSTRING 60
1285 static void test_options(char *options)
1286 {
1287 	char tstr[OPTSTRING];
1288 
1289 	memset(options, 0, OPTSTRING);
1290 
1291 	if (txmsg_pass)
1292 		strncat(options, "pass,", OPTSTRING);
1293 	if (txmsg_redir)
1294 		strncat(options, "redir,", OPTSTRING);
1295 	if (txmsg_drop)
1296 		strncat(options, "drop,", OPTSTRING);
1297 	if (txmsg_apply) {
1298 		snprintf(tstr, OPTSTRING, "apply %d,", txmsg_apply);
1299 		strncat(options, tstr, OPTSTRING);
1300 	}
1301 	if (txmsg_cork) {
1302 		snprintf(tstr, OPTSTRING, "cork %d,", txmsg_cork);
1303 		strncat(options, tstr, OPTSTRING);
1304 	}
1305 	if (txmsg_start) {
1306 		snprintf(tstr, OPTSTRING, "start %d,", txmsg_start);
1307 		strncat(options, tstr, OPTSTRING);
1308 	}
1309 	if (txmsg_end) {
1310 		snprintf(tstr, OPTSTRING, "end %d,", txmsg_end);
1311 		strncat(options, tstr, OPTSTRING);
1312 	}
1313 	if (txmsg_start_pop) {
1314 		snprintf(tstr, OPTSTRING, "pop (%d,%d),",
1315 			 txmsg_start_pop, txmsg_start_pop + txmsg_pop);
1316 		strncat(options, tstr, OPTSTRING);
1317 	}
1318 	if (txmsg_ingress)
1319 		strncat(options, "ingress,", OPTSTRING);
1320 	if (txmsg_redir_skb)
1321 		strncat(options, "redir_skb,", OPTSTRING);
1322 	if (txmsg_ktls_skb)
1323 		strncat(options, "ktls_skb,", OPTSTRING);
1324 	if (ktls)
1325 		strncat(options, "ktls,", OPTSTRING);
1326 	if (peek_flag)
1327 		strncat(options, "peek,", OPTSTRING);
1328 }
1329 
1330 static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
1331 {
1332 	char *options = calloc(OPTSTRING, sizeof(char));
1333 	int err;
1334 
1335 	if (test == SENDPAGE)
1336 		opt->sendpage = true;
1337 	else
1338 		opt->sendpage = false;
1339 
1340 	if (txmsg_drop)
1341 		opt->drop_expected = true;
1342 	else
1343 		opt->drop_expected = false;
1344 
1345 	test_options(options);
1346 
1347 	if (opt->verbose) {
1348 		fprintf(stdout,
1349 			" [TEST %i]: (%i, %i, %i, %s, %s): ",
1350 			test_cnt, opt->rate, opt->iov_count, opt->iov_length,
1351 			test_to_str(test), options);
1352 		fflush(stdout);
1353 	}
1354 	err = run_options(opt, cgrp, test);
1355 	if (opt->verbose)
1356 		fprintf(stdout, " %s\n", !err ? "PASS" : "FAILED");
1357 	test_cnt++;
1358 	!err ? passed++ : failed++;
1359 	free(options);
1360 	return err;
1361 }
1362 
1363 static void test_exec(int cgrp, struct sockmap_options *opt)
1364 {
1365 	int type = strcmp(opt->map, BPF_SOCKMAP_FILENAME);
1366 	int err;
1367 
1368 	if (type == 0) {
1369 		test_start();
1370 		err = __test_exec(cgrp, SENDMSG, opt);
1371 		if (err)
1372 			test_fail();
1373 	} else {
1374 		test_start();
1375 		err = __test_exec(cgrp, SENDPAGE, opt);
1376 		if (err)
1377 			test_fail();
1378 	}
1379 }
1380 
1381 static void test_send_one(struct sockmap_options *opt, int cgrp)
1382 {
1383 	opt->iov_length = 1;
1384 	opt->iov_count = 1;
1385 	opt->rate = 1;
1386 	test_exec(cgrp, opt);
1387 
1388 	opt->iov_length = 1;
1389 	opt->iov_count = 1024;
1390 	opt->rate = 1;
1391 	test_exec(cgrp, opt);
1392 
1393 	opt->iov_length = 1024;
1394 	opt->iov_count = 1;
1395 	opt->rate = 1;
1396 	test_exec(cgrp, opt);
1397 
1398 }
1399 
1400 static void test_send_many(struct sockmap_options *opt, int cgrp)
1401 {
1402 	opt->iov_length = 3;
1403 	opt->iov_count = 1;
1404 	opt->rate = 512;
1405 	test_exec(cgrp, opt);
1406 
1407 	opt->rate = 100;
1408 	opt->iov_count = 1;
1409 	opt->iov_length = 5;
1410 	test_exec(cgrp, opt);
1411 }
1412 
1413 static void test_send_large(struct sockmap_options *opt, int cgrp)
1414 {
1415 	opt->iov_length = 256;
1416 	opt->iov_count = 1024;
1417 	opt->rate = 2;
1418 	test_exec(cgrp, opt);
1419 }
1420 
1421 static void test_send(struct sockmap_options *opt, int cgrp)
1422 {
1423 	test_send_one(opt, cgrp);
1424 	test_send_many(opt, cgrp);
1425 	test_send_large(opt, cgrp);
1426 	sched_yield();
1427 }
1428 
1429 static void test_txmsg_pass(int cgrp, struct sockmap_options *opt)
1430 {
1431 	/* Test small and large iov_count values with pass/redir/apply/cork */
1432 	txmsg_pass = 1;
1433 	test_send(opt, cgrp);
1434 }
1435 
1436 static void test_txmsg_redir(int cgrp, struct sockmap_options *opt)
1437 {
1438 	txmsg_redir = 1;
1439 	test_send(opt, cgrp);
1440 }
1441 
1442 static void test_txmsg_drop(int cgrp, struct sockmap_options *opt)
1443 {
1444 	txmsg_drop = 1;
1445 	test_send(opt, cgrp);
1446 }
1447 
1448 static void test_txmsg_ingress_redir(int cgrp, struct sockmap_options *opt)
1449 {
1450 	txmsg_pass = txmsg_drop = 0;
1451 	txmsg_ingress = txmsg_redir = 1;
1452 	test_send(opt, cgrp);
1453 }
1454 
1455 static void test_txmsg_skb(int cgrp, struct sockmap_options *opt)
1456 {
1457 	bool data = opt->data_test;
1458 	int k = ktls;
1459 
1460 	opt->data_test = true;
1461 	ktls = 1;
1462 
1463 	txmsg_pass = txmsg_drop = 0;
1464 	txmsg_ingress = txmsg_redir = 0;
1465 	txmsg_ktls_skb = 1;
1466 	txmsg_pass = 1;
1467 
1468 	/* Using data verification so ensure iov layout is
1469 	 * expected from test receiver side. e.g. has enough
1470 	 * bytes to write test code.
1471 	 */
1472 	opt->iov_length = 100;
1473 	opt->iov_count = 1;
1474 	opt->rate = 1;
1475 	test_exec(cgrp, opt);
1476 
1477 	txmsg_ktls_skb_drop = 1;
1478 	test_exec(cgrp, opt);
1479 
1480 	txmsg_ktls_skb_drop = 0;
1481 	txmsg_ktls_skb_redir = 1;
1482 	test_exec(cgrp, opt);
1483 
1484 	opt->data_test = data;
1485 	ktls = k;
1486 }
1487 
1488 
1489 /* Test cork with hung data. This tests poor usage patterns where
1490  * cork can leave data on the ring if user program is buggy and
1491  * doesn't flush them somehow. They do take some time however
1492  * because they wait for a timeout. Test pass, redir and cork with
1493  * apply logic. Use cork size of 4097 with send_large to avoid
1494  * aligning cork size with send size.
1495  */
1496 static void test_txmsg_cork_hangs(int cgrp, struct sockmap_options *opt)
1497 {
1498 	txmsg_pass = 1;
1499 	txmsg_redir = 0;
1500 	txmsg_cork = 4097;
1501 	txmsg_apply = 4097;
1502 	test_send_large(opt, cgrp);
1503 
1504 	txmsg_pass = 0;
1505 	txmsg_redir = 1;
1506 	txmsg_apply = 0;
1507 	txmsg_cork = 4097;
1508 	test_send_large(opt, cgrp);
1509 
1510 	txmsg_pass = 0;
1511 	txmsg_redir = 1;
1512 	txmsg_apply = 4097;
1513 	txmsg_cork = 4097;
1514 	test_send_large(opt, cgrp);
1515 }
1516 
1517 static void test_txmsg_pull(int cgrp, struct sockmap_options *opt)
1518 {
1519 	/* Test basic start/end */
1520 	txmsg_start = 1;
1521 	txmsg_end = 2;
1522 	test_send(opt, cgrp);
1523 
1524 	/* Test >4k pull */
1525 	txmsg_start = 4096;
1526 	txmsg_end = 9182;
1527 	test_send_large(opt, cgrp);
1528 
1529 	/* Test pull + redirect */
1530 	txmsg_redir = 0;
1531 	txmsg_start = 1;
1532 	txmsg_end = 2;
1533 	test_send(opt, cgrp);
1534 
1535 	/* Test pull + cork */
1536 	txmsg_redir = 0;
1537 	txmsg_cork = 512;
1538 	txmsg_start = 1;
1539 	txmsg_end = 2;
1540 	test_send_many(opt, cgrp);
1541 
1542 	/* Test pull + cork + redirect */
1543 	txmsg_redir = 1;
1544 	txmsg_cork = 512;
1545 	txmsg_start = 1;
1546 	txmsg_end = 2;
1547 	test_send_many(opt, cgrp);
1548 }
1549 
1550 static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
1551 {
1552 	/* Test basic pop */
1553 	txmsg_start_pop = 1;
1554 	txmsg_pop = 2;
1555 	test_send_many(opt, cgrp);
1556 
1557 	/* Test pop with >4k */
1558 	txmsg_start_pop = 4096;
1559 	txmsg_pop = 4096;
1560 	test_send_large(opt, cgrp);
1561 
1562 	/* Test pop + redirect */
1563 	txmsg_redir = 1;
1564 	txmsg_start_pop = 1;
1565 	txmsg_pop = 2;
1566 	test_send_many(opt, cgrp);
1567 
1568 	/* Test pop + cork */
1569 	txmsg_redir = 0;
1570 	txmsg_cork = 512;
1571 	txmsg_start_pop = 1;
1572 	txmsg_pop = 2;
1573 	test_send_many(opt, cgrp);
1574 
1575 	/* Test pop + redirect + cork */
1576 	txmsg_redir = 1;
1577 	txmsg_cork = 4;
1578 	txmsg_start_pop = 1;
1579 	txmsg_pop = 2;
1580 	test_send_many(opt, cgrp);
1581 }
1582 
1583 static void test_txmsg_push(int cgrp, struct sockmap_options *opt)
1584 {
1585 	/* Test basic push */
1586 	txmsg_start_push = 1;
1587 	txmsg_end_push = 1;
1588 	test_send(opt, cgrp);
1589 
1590 	/* Test push 4kB >4k */
1591 	txmsg_start_push = 4096;
1592 	txmsg_end_push = 4096;
1593 	test_send_large(opt, cgrp);
1594 
1595 	/* Test push + redirect */
1596 	txmsg_redir = 1;
1597 	txmsg_start_push = 1;
1598 	txmsg_end_push = 2;
1599 	test_send_many(opt, cgrp);
1600 
1601 	/* Test push + cork */
1602 	txmsg_redir = 0;
1603 	txmsg_cork = 512;
1604 	txmsg_start_push = 1;
1605 	txmsg_end_push = 2;
1606 	test_send_many(opt, cgrp);
1607 }
1608 
1609 static void test_txmsg_push_pop(int cgrp, struct sockmap_options *opt)
1610 {
1611 	txmsg_start_push = 1;
1612 	txmsg_end_push = 10;
1613 	txmsg_start_pop = 5;
1614 	txmsg_pop = 4;
1615 	test_send_large(opt, cgrp);
1616 }
1617 
1618 static void test_txmsg_apply(int cgrp, struct sockmap_options *opt)
1619 {
1620 	txmsg_pass = 1;
1621 	txmsg_redir = 0;
1622 	txmsg_apply = 1;
1623 	txmsg_cork = 0;
1624 	test_send_one(opt, cgrp);
1625 
1626 	txmsg_pass = 0;
1627 	txmsg_redir = 1;
1628 	txmsg_apply = 1;
1629 	txmsg_cork = 0;
1630 	test_send_one(opt, cgrp);
1631 
1632 	txmsg_pass = 1;
1633 	txmsg_redir = 0;
1634 	txmsg_apply = 1024;
1635 	txmsg_cork = 0;
1636 	test_send_large(opt, cgrp);
1637 
1638 	txmsg_pass = 0;
1639 	txmsg_redir = 1;
1640 	txmsg_apply = 1024;
1641 	txmsg_cork = 0;
1642 	test_send_large(opt, cgrp);
1643 }
1644 
1645 static void test_txmsg_cork(int cgrp, struct sockmap_options *opt)
1646 {
1647 	txmsg_pass = 1;
1648 	txmsg_redir = 0;
1649 	txmsg_apply = 0;
1650 	txmsg_cork = 1;
1651 	test_send(opt, cgrp);
1652 
1653 	txmsg_pass = 1;
1654 	txmsg_redir = 0;
1655 	txmsg_apply = 1;
1656 	txmsg_cork = 1;
1657 	test_send(opt, cgrp);
1658 }
1659 
1660 static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt)
1661 {
1662 	txmsg_pass = 1;
1663 	skb_use_parser = 512;
1664 	opt->iov_length = 256;
1665 	opt->iov_count = 1;
1666 	opt->rate = 2;
1667 	test_exec(cgrp, opt);
1668 }
1669 
1670 char *map_names[] = {
1671 	"sock_map",
1672 	"sock_map_txmsg",
1673 	"sock_map_redir",
1674 	"sock_apply_bytes",
1675 	"sock_cork_bytes",
1676 	"sock_bytes",
1677 	"sock_redir_flags",
1678 	"sock_skb_opts",
1679 	"tls_sock_map",
1680 };
1681 
1682 int prog_attach_type[] = {
1683 	BPF_SK_SKB_STREAM_PARSER,
1684 	BPF_SK_SKB_STREAM_VERDICT,
1685 	BPF_SK_SKB_STREAM_VERDICT,
1686 	BPF_CGROUP_SOCK_OPS,
1687 	BPF_SK_MSG_VERDICT,
1688 	BPF_SK_MSG_VERDICT,
1689 	BPF_SK_MSG_VERDICT,
1690 	BPF_SK_MSG_VERDICT,
1691 	BPF_SK_MSG_VERDICT,
1692 	BPF_SK_MSG_VERDICT,
1693 	BPF_SK_MSG_VERDICT,
1694 };
1695 
1696 int prog_type[] = {
1697 	BPF_PROG_TYPE_SK_SKB,
1698 	BPF_PROG_TYPE_SK_SKB,
1699 	BPF_PROG_TYPE_SK_SKB,
1700 	BPF_PROG_TYPE_SOCK_OPS,
1701 	BPF_PROG_TYPE_SK_MSG,
1702 	BPF_PROG_TYPE_SK_MSG,
1703 	BPF_PROG_TYPE_SK_MSG,
1704 	BPF_PROG_TYPE_SK_MSG,
1705 	BPF_PROG_TYPE_SK_MSG,
1706 	BPF_PROG_TYPE_SK_MSG,
1707 	BPF_PROG_TYPE_SK_MSG,
1708 };
1709 
1710 static int populate_progs(char *bpf_file)
1711 {
1712 	struct bpf_program *prog;
1713 	struct bpf_object *obj;
1714 	int i = 0;
1715 	long err;
1716 
1717 	obj = bpf_object__open(bpf_file);
1718 	err = libbpf_get_error(obj);
1719 	if (err) {
1720 		char err_buf[256];
1721 
1722 		libbpf_strerror(err, err_buf, sizeof(err_buf));
1723 		printf("Unable to load eBPF objects in file '%s' : %s\n",
1724 		       bpf_file, err_buf);
1725 		return -1;
1726 	}
1727 
1728 	bpf_object__for_each_program(prog, obj) {
1729 		bpf_program__set_type(prog, prog_type[i]);
1730 		bpf_program__set_expected_attach_type(prog,
1731 						      prog_attach_type[i]);
1732 		i++;
1733 	}
1734 
1735 	i = bpf_object__load(obj);
1736 	i = 0;
1737 	bpf_object__for_each_program(prog, obj) {
1738 		prog_fd[i] = bpf_program__fd(prog);
1739 		i++;
1740 	}
1741 
1742 	for (i = 0; i < sizeof(map_fd)/sizeof(int); i++) {
1743 		maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
1744 		map_fd[i] = bpf_map__fd(maps[i]);
1745 		if (map_fd[i] < 0) {
1746 			fprintf(stderr, "load_bpf_file: (%i) %s\n",
1747 				map_fd[i], strerror(errno));
1748 			return -1;
1749 		}
1750 	}
1751 
1752 	return 0;
1753 }
1754 
1755 struct _test test[] = {
1756 	{"txmsg test passthrough", test_txmsg_pass},
1757 	{"txmsg test redirect", test_txmsg_redir},
1758 	{"txmsg test drop", test_txmsg_drop},
1759 	{"txmsg test ingress redirect", test_txmsg_ingress_redir},
1760 	{"txmsg test skb", test_txmsg_skb},
1761 	{"txmsg test apply", test_txmsg_apply},
1762 	{"txmsg test cork", test_txmsg_cork},
1763 	{"txmsg test hanging corks", test_txmsg_cork_hangs},
1764 	{"txmsg test push_data", test_txmsg_push},
1765 	{"txmsg test pull-data", test_txmsg_pull},
1766 	{"txmsg test pop-data", test_txmsg_pop},
1767 	{"txmsg test push/pop data", test_txmsg_push_pop},
1768 	{"txmsg text ingress parser", test_txmsg_ingress_parser},
1769 };
1770 
1771 static int check_whitelist(struct _test *t, struct sockmap_options *opt)
1772 {
1773 	char *entry, *ptr;
1774 
1775 	if (!opt->whitelist)
1776 		return 0;
1777 	ptr = strdup(opt->whitelist);
1778 	if (!ptr)
1779 		return -ENOMEM;
1780 	entry = strtok(ptr, ",");
1781 	while (entry) {
1782 		if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
1783 		    strstr(opt->map, entry) != 0 ||
1784 		    strstr(t->title, entry) != 0)
1785 			return 0;
1786 		entry = strtok(NULL, ",");
1787 	}
1788 	return -EINVAL;
1789 }
1790 
1791 static int check_blacklist(struct _test *t, struct sockmap_options *opt)
1792 {
1793 	char *entry, *ptr;
1794 
1795 	if (!opt->blacklist)
1796 		return -EINVAL;
1797 	ptr = strdup(opt->blacklist);
1798 	if (!ptr)
1799 		return -ENOMEM;
1800 	entry = strtok(ptr, ",");
1801 	while (entry) {
1802 		if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
1803 		    strstr(opt->map, entry) != 0 ||
1804 		    strstr(t->title, entry) != 0)
1805 			return 0;
1806 		entry = strtok(NULL, ",");
1807 	}
1808 	return -EINVAL;
1809 }
1810 
1811 static int __test_selftests(int cg_fd, struct sockmap_options *opt)
1812 {
1813 	int i, err;
1814 
1815 	err = populate_progs(opt->map);
1816 	if (err < 0) {
1817 		fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
1818 		return err;
1819 	}
1820 
1821 	/* Tests basic commands and APIs */
1822 	for (i = 0; i < sizeof(test)/sizeof(struct _test); i++) {
1823 		struct _test t = test[i];
1824 
1825 		if (check_whitelist(&t, opt) != 0)
1826 			continue;
1827 		if (check_blacklist(&t, opt) == 0)
1828 			continue;
1829 
1830 		test_start_subtest(&t, opt);
1831 		t.tester(cg_fd, opt);
1832 		test_end_subtest();
1833 	}
1834 
1835 	return err;
1836 }
1837 
1838 static void test_selftests_sockmap(int cg_fd, struct sockmap_options *opt)
1839 {
1840 	opt->map = BPF_SOCKMAP_FILENAME;
1841 	__test_selftests(cg_fd, opt);
1842 }
1843 
1844 static void test_selftests_sockhash(int cg_fd, struct sockmap_options *opt)
1845 {
1846 	opt->map = BPF_SOCKHASH_FILENAME;
1847 	__test_selftests(cg_fd, opt);
1848 }
1849 
1850 static void test_selftests_ktls(int cg_fd, struct sockmap_options *opt)
1851 {
1852 	opt->map = BPF_SOCKHASH_FILENAME;
1853 	opt->prepend = "ktls";
1854 	ktls = 1;
1855 	__test_selftests(cg_fd, opt);
1856 	ktls = 0;
1857 }
1858 
1859 static int test_selftest(int cg_fd, struct sockmap_options *opt)
1860 {
1861 
1862 	test_selftests_sockmap(cg_fd, opt);
1863 	test_selftests_sockhash(cg_fd, opt);
1864 	test_selftests_ktls(cg_fd, opt);
1865 	test_print_results();
1866 	return 0;
1867 }
1868 
1869 int main(int argc, char **argv)
1870 {
1871 	int iov_count = 1, length = 1024, rate = 1;
1872 	struct sockmap_options options = {0};
1873 	int opt, longindex, err, cg_fd = 0;
1874 	char *bpf_file = BPF_SOCKMAP_FILENAME;
1875 	int test = SELFTESTS;
1876 	bool cg_created = 0;
1877 
1878 	while ((opt = getopt_long(argc, argv, ":dhv:c:r:i:l:t:p:q:n:b:",
1879 				  long_options, &longindex)) != -1) {
1880 		switch (opt) {
1881 		case 's':
1882 			txmsg_start = atoi(optarg);
1883 			break;
1884 		case 'e':
1885 			txmsg_end = atoi(optarg);
1886 			break;
1887 		case 'p':
1888 			txmsg_start_push = atoi(optarg);
1889 			break;
1890 		case 'q':
1891 			txmsg_end_push = atoi(optarg);
1892 			break;
1893 		case 'w':
1894 			txmsg_start_pop = atoi(optarg);
1895 			break;
1896 		case 'x':
1897 			txmsg_pop = atoi(optarg);
1898 			break;
1899 		case 'a':
1900 			txmsg_apply = atoi(optarg);
1901 			break;
1902 		case 'k':
1903 			txmsg_cork = atoi(optarg);
1904 			break;
1905 		case 'c':
1906 			cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
1907 			if (cg_fd < 0) {
1908 				fprintf(stderr,
1909 					"ERROR: (%i) open cg path failed: %s\n",
1910 					cg_fd, optarg);
1911 				return cg_fd;
1912 			}
1913 			break;
1914 		case 'r':
1915 			rate = atoi(optarg);
1916 			break;
1917 		case 'v':
1918 			options.verbose = 1;
1919 			if (optarg)
1920 				options.verbose = atoi(optarg);
1921 			break;
1922 		case 'i':
1923 			iov_count = atoi(optarg);
1924 			break;
1925 		case 'l':
1926 			length = atoi(optarg);
1927 			break;
1928 		case 'd':
1929 			options.data_test = true;
1930 			break;
1931 		case 't':
1932 			if (strcmp(optarg, "ping") == 0) {
1933 				test = PING_PONG;
1934 			} else if (strcmp(optarg, "sendmsg") == 0) {
1935 				test = SENDMSG;
1936 			} else if (strcmp(optarg, "base") == 0) {
1937 				test = BASE;
1938 			} else if (strcmp(optarg, "base_sendpage") == 0) {
1939 				test = BASE_SENDPAGE;
1940 			} else if (strcmp(optarg, "sendpage") == 0) {
1941 				test = SENDPAGE;
1942 			} else {
1943 				usage(argv);
1944 				return -1;
1945 			}
1946 			break;
1947 		case 'n':
1948 			options.whitelist = strdup(optarg);
1949 			if (!options.whitelist)
1950 				return -ENOMEM;
1951 			break;
1952 		case 'b':
1953 			options.blacklist = strdup(optarg);
1954 			if (!options.blacklist)
1955 				return -ENOMEM;
1956 		case 0:
1957 			break;
1958 		case 'h':
1959 		default:
1960 			usage(argv);
1961 			return -1;
1962 		}
1963 	}
1964 
1965 	if (!cg_fd) {
1966 		cg_fd = cgroup_setup_and_join(CG_PATH);
1967 		if (cg_fd < 0)
1968 			return cg_fd;
1969 		cg_created = 1;
1970 	}
1971 
1972 	if (test == SELFTESTS) {
1973 		err = test_selftest(cg_fd, &options);
1974 		goto out;
1975 	}
1976 
1977 	err = populate_progs(bpf_file);
1978 	if (err) {
1979 		fprintf(stderr, "populate program: (%s) %s\n",
1980 			bpf_file, strerror(errno));
1981 		return 1;
1982 	}
1983 	running = 1;
1984 
1985 	/* catch SIGINT */
1986 	signal(SIGINT, running_handler);
1987 
1988 	options.iov_count = iov_count;
1989 	options.iov_length = length;
1990 	options.rate = rate;
1991 
1992 	err = run_options(&options, cg_fd, test);
1993 out:
1994 	if (options.whitelist)
1995 		free(options.whitelist);
1996 	if (options.blacklist)
1997 		free(options.blacklist);
1998 	if (cg_created)
1999 		cleanup_cgroup_environment();
2000 	close(cg_fd);
2001 	return err;
2002 }
2003 
2004 void running_handler(int a)
2005 {
2006 	running = 0;
2007 }
2008